CARGA DE LIBRERIAS¶

In [2]:
library(gplots)
library(foreign)
library(car)
library(plotly)
library(ggpubr)
library(ggplot2)
library(reprex)
library(dplyr)
library(readxl)
library(readr)
library(rpart)
library(glmulti)
library(caret)
library(InformationValue)
library(rvest)
library(stringi)
library(stringr)
library(dplyr)
library(fmsb)
library(dlookr)
library(ggrepel)
library(rattle)
library(corrplot)
library(factoextra)
library(mclust)
library(FactoMineR)
library(tidyr)
library(GGally)
library(gridExtra)
library(grid)
library(FSelector)
library(mlbench)
library(RRF)
library(wsrf)
library(Boruta)
options(warn=-1)
In [3]:
options(repr.plot.width = 18, repr.plot.height = 10)

IMPORTACION Y LECTURA DE LOS DATOS¶

In [4]:
options(repr.matrix.max.rows=600, repr.matrix.max.cols=200)
In [145]:
datos <- read_excel("C:/Users/34625/Downloads/PROYECTOS DE TRABAJO/Yankel Carolina Sena/fwddatayankelsena/original_yankel.xlsx",sheet='Hoja12')
datos
A tibble: 59 × 34
STATUSCYCLES_BETWEEN_PET1_PET2GENDERAGETNM_STAGEDIFF_WBCDIFF_RBCDIFF_HBDIFF_PLTDIFF_CRPDIFF_ALBUMINDIFF_LDHDIFF_eGFRDIFF_ASTDIFF_ALTDIFF_KDIFF_BGLBMIDIFF_BWDIFF_SPLEEN_UPTAKEDIFF_BM_UPTAKEDIFF_LIVER_UPTAKEDIFF_ESTIMATED_SPLEEN_VOLDIAGNOSTICTREATMENTECOGPSCOMORBIDITIESCTCNCIACTION_TAKEN_TIME_BETWEE_PETdiasDIFF_SLRDIFF_BMLROVERALL_TIME
<chr><dbl><chr><dbl><chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr><chr><chr><chr><chr><chr><dbl><dbl><dbl><dbl><dbl>
ALIVE 3MALE 54III-1.83-0.64-2.20 -82 -0.56 0.98-271.0 23.7 0.30 13.20 0.10 1222.90 -4.00 0.02 0.12 0.44 -80.9GINECOLOGICAL ICI SYMPTOMATIC BUT AMBULATORY HYPERLIPIDEMIA SEVERE DRUG INTERRUPTED 2 25 0.029335236 0.109059370 2
ALIVE 2FEMALE79IV -3.08 0.14 0.70-159 -3.80 1.33 22.0 -0.2 4.40 5.30 0.58 -1416.47 0.30-0.20-0.60-0.30 6.2HEAD AND NECK ICI ASYMTOMATIC HYPERTENSION NO SIDE EFFECTS DRUG INTERRUPTED 4 0-0.156566790-0.04688961250
ALIVE 3MALE 60III-3.18-0.50 3.54 22 -0.06-0.20 64.2 4.6 -2.60 1.10 0.33 1220.15 -8.10-0.12 0.34 0.27 9.1LUNG CANCER ICI SYMPTOMATIC,<50% IN BED DURING THE DAYDIABETES MELLITUS MODERATE DOSE REDUCED 4 14-0.044698028-0.365688558 2
ALIVE 3MALE 76II -1.28 0.79 1.70 9 0.04-0.12 -55.0 -8.9 -0.70 -6.70 0.05 -422.83 0.00 0.46-0.40 0.59 -38.8RENAL CANCER ICI ASYMTOMATIC HYPERTENSION NO SIDE EFFECTS DOSE NOT CHANGED 6 0-0.002886671 0.13061297212
ALIVE 5FEMALE70II 0.00-0.09-0.40 -95 -7.23 0.05 6.0-13.2 2.70 2.20 0.06 -818.29 -0.10-0.34-0.80-0.48 -87.6MELANOMA ICI SYMPTOMATIC BUT AMBULATORY DIABETES MELLITUS NO SIDE EFFECTS DOSE NOT CHANGED 6 0-0.095215760-0.25268024712
ALIVE 4MALE 54IV 0.99 0.10 0.00 -3 0.02-0.16 -4.0-13.1 0.60 2.80-0.47 -327.18-10.80 0.00-0.10-0.53 34.4LUNG CANCER ICI ASYMTOMATIC CANCER RRECURRENCE HIGHT SEVERE DOSE NOT CHANGED 6363-0.167591661 0.07251378818
ALIVE 2MALE 60III-1.48-1.23-1.90 -60 0.02-0.46 -16.0 14.1 5.20 0.60-0.26 -1222.76 1.40-0.07 0.11-0.17 -85.0LUNG CANCER ICI ASYMTOMATIC CHRONIC INFLAMMATIONNO SIDE EFFECTS DOSE NOT CHANGED 9 0 0.114290017 0.200158109 7
ALIVE 5MALE 69III-1.65 0.04 0.60-354 4.66 0.52-333.0 -9.2 -7.30 -23.90-0.02 -7017.36 -2.20 0.61-0.18 0.61 11.3HEAD AND NECK ICI SYMPTOMATIC,<50% IN BED DURING THE DAYDIABETES MELLITUS MODERATE DOSE NOT CHANGED 9 95 0.223201621 0.31414097616
ALIVE 6MALE 64IV 3.89-0.78-3.20 242 15.21-1.22 -28.0 9.0 -10.40 -13.20-0.23 7023.65-11.10 0.30 0.50 0.03 8.1HEAD AND NECK ICI SYMPTOMATIC BUT AMBULATORY HYPERLIPIDEMIA SLIGTHLY SIDE EFFCTSDOSE NOT CHANGED 10 28 0.163665694 0.07051774565
ALIVE12MALE 72III-0.80-0.37-0.90 -26 -0.02-0.09 -13.0 -2.1 0.00 -1.90 0.26 6422.84 -6.60 0.24 0.48 0.65 -51.4HEAD AND NECK ICI ASYMTOMATIC CHRONIC INFLAMMATIONNO SIDE EFFECTS DOSE NOT CHANGED 16 0-0.117647059 0.083556150 1
ALIVE29MALE 71III-0.91 0.07 0.20 11 0.18 0.18 52.0 -8.7 -0.60 -0.10 0.59 1121.34 0.00-0.05 0.69 0.09 -46.1HEAD AND NECK ICI ASYMTOMATIC HYPERTENSION NO SIDE EFFECTS ADDED OTHER TREATMEN21 0 0.312678365 0.61977621441
ALIVE 6MALE 67III-2.67-0.86 2.30 -4 -0.36-0.72 69.0 16.9 5.00 3.80 0.05 1120.29 -1.10 0.20 0.70 0.52 66.9LUNG CANCER ICI ASYMTOMATIC CANCER RRECURRENCE NO SIDE EFFECTS DOSE NOT CHANGED 25 0-0.160987704-0.06144878811
ALIVE48MALE 60III 4.52 0.11-1.90 -63 17.83-0.61 -15.0-24.1 -4.10 -4.40-0.67 6429.11 -6.20 1.13 2.08 0.31 177.0RENAL CANCER ICI ASYMTOMATIC HYPERLIPIDEMIA SLIGTHLY SIDE EFFCTSUNKNOW 25735 0.413640470-0.03712737111
ALIVE20MALE 61IV 1.71 0.89 3.70-183 -1.24 1.74 86.0 -9.4 -0.50 1.80-0.17 -1120.12 0.00 0.15-0.22 0.57 -44.5MELANOMA ICI SYMPTOMATIC,<50% IN BED DURING THE DAYDIABETES MELLITUS NO SIDE EFFECTS UNKNOW 38 0 0.125985126 0.39696414715
DEATH 2MALE 83II 2.37-0.12 1.60 44 0.00 0.00 0.0 0.0 0.00 0.00 0.00 1424.46 -1.50 0.10 0.35-0.03 31.6MELANOMA ICI ASYMTOMATIC CANCER RRECURRENCE NO SIDE EFFECTS DOSE NOT CHANGED 11 0 0.253714576 0.288722186 4
DEATH 3MALE 68I 3.22-1.11 0.90 -59 3.02-0.09 5.0-19.6 -11.60 -7.50 0.17 624.41 -4.30-0.05-0.45 0.27 -2.1MELANOMA ICI SYMPTOMATIC,<50% IN BED DURING THE DAYCANCER RRECURRENCE NO SIDE EFFECTS DOSE NOT CHANGED 2 0 0.207638889 0.08194444413
DEATH18MALE 77II 0.37 0.63 1.90 -24 0.22-0.06 46.0 7.5 6.50 4.10 0.48 -1218.07 0.00-0.13 0.20-0.14 -2.4RENAL CANCER ICI ASYMTOMATIC CHRONIC INFLAMMATIONSEVERE ADDED STEROIDS 10 3-0.117647059 0.08355615012
DEATH 4MALE 60III-1.23-0.28-1.20 -11 2.69 0.10 194.0-10.2 10.30 7.50-0.01 123.14 4.00-0.42-0.40-0.74 55.2LUNG CANCER ICI SYMPTOMATIC BUT AMBULATORY CHRONIC INFLAMMATIONMODERATE UNKNOW 15441 0.030665281 0.13326403329
DEATH 7MALE 64IV 0.46-0.41-1.50 34 0.31-0.44 -8.0 -4.6 -1.10 0.40 0.44 -622.12 5.70 0.27 0.46-0.37 15.1HEAD AND NECK ICI SYMPTOMATIC,<50% IN BED DURING THE DAYDIABETES MELLITUS MODERATE DRUG INTERRUPTED 4 52 0.135997522 0.20012391642
DEATH15MALE 67II -4.64 0.71 2.40-189-15.88 1.57-510.0-24.5 -2.90 -16.90 0.00 -2714.08 44.02-0.40-0.19 0.19 2.4HEAD AND NECK ICI SYMPTOMATIC BUT AMBULATORY DIABETES MELLITUS SLIGTHLY SIDE EFFCTSDRUG INTERRUPTED 13364-0.513598150-0.14050021021
DEATH 5MALE 83IV 1.13-0.21-0.60 16 0.32 0.08 -9.0 -2.8 0.70 0.40 0.59 1620.07 5.90 0.22-0.21 0.34 -85.5HEAD AND NECK ICI SYMPTOMATIC,<50% IN BED DURING THE DAYHYPERTENSION NO SIDE EFFECTS DOSE NOT CHANGED 8 0-0.193627043-0.09373884921
DEATH 4FEMALE56II 7.74-0.33-0.80 463 -2.03 2.83-140.0 45.3-219.60-250.80-0.90 -1821.79 -2.30-0.16 0.26-0.84 34.0MELANOMA ICI SYMPTOMATIC BUT AMBULATORY HYPERLIPIDEMIA HIGHT SEVERE DOSE REDUCED 9233-0.045342594 0.034875129 8
ALIVE 7MALE 65II -1.68-0.68 1.00 0 7.55-0.13 8.0 -3.3 -6.90 -5.70 0.04 1418.18 12.20 0.31-0.54-0.85 22.3PANCREAS CANCER CHEMOASYMTOMATIC DIABETES MELLITUS NO SIDE EFFECTS UNKNOW 1 0 0.004147110 0.16218735927
ALIVE 5FEMALE55IV -3.77-1.93 1.40 -58 0.12 0.01 -29.0 6.7 -3.10 -10.20 0.48 2525.60 4.60 0.40 0.59-0.24 29.9GINECOLOGICAL CHEMOSYMPTOMATIC,<50% IN BED DURING THE DAYHYPERLIPIDEMIA SEVERE DRUG INTERRUPTED 2 20-0.230769231-0.33076923110
ALIVE 2FEMALE75III-2.17-0.94-2.30-100 0.00 0.27 21.0-10.4 1.80 -2.40-0.20 825.96 -5.40 0.19 0.65 0.88 -1.0LUNG CANCER CHEMOSYMPTOMATIC BUT AMBULATORY HYPERTENSION MODERATE DOSE REDUCED 2 15 0.071091909 0.51158680341
ALIVE 6MALE 74III-3.24-1.16-0.20 100 -0.46-0.56 41.0 3.0 4.40 0.80 0.68 920.85 9.30 0.00-0.20 0.60 19.0PANCREAS CANCER CHEMOASYMTOMATIC HYPERTENSION MODERATE DOSE REDUCED 3 35 0.509097867 0.149792993 3
ALIVE 3FEMALE76IV -0.04-0.43-0.80 95 0.02-0.10 103.0 -2.2 7.90 0.10-0.33 220.90 -2.40-0.12 0.84-0.27 19.7LUNG CANCER CHEMOASYMTOMATIC DIABETES MELLITUS NO SIDE EFFECTS DOSE NOT CHANGED 3 0 0.123854660 0.02860614932
ALIVE 3MALE 77III-5.70-1.29-2.60-229 0.18-0.21 45.0 -9.7 -19.70 -84.70-0.10 -1322.94 0.00 0.03-0.33 0.84 98.8LUNG CANCER CHEMOASYMTOMATIC HYPERTENSION HIGHT SEVERE DOSE REDUCED 3 6 0.047663153-0.21905644015
ALIVE 4MALE 68II -0.47 0.05 5.80 69 -2.74 1.18 67.0-16.2 1.20 -0.70 0.76 -2021.57 5.40 0.26 0.55 0.46 4.1LUNG CANCER CHEMOASYMTOMATIC HYPERTENSION NO SIDE EFFECTS DOSE NOT CHANGED 4 0-0.026468254 0.04327381014
ALIVE 2MALE 68II -2.47-0.73-1.00 -61 -0.10-0.12 25.0 13.4 -0.70 -2.10 0.19 -1335.81 -1.30-0.05 0.43-0.07 15.1LUNG CANCER CHEMOSYMPTOMATIC BUT AMBULATORY DIABETES MELLITUS NO SIDE EFFECTS DOSE NOT CHANGED 4 0-0.318764676-0.09074958415
ALIVE 3FEMALE86IV -4.78-1.30-3.50 -17 -6.38-0.12 -2.0 -3.0 -15.20 -19.30 0.23 -826.75 -2.40 0.30 0.19 0.35 41.0UROTHELIAL CARCINOMACHEMOSYMPTOMATIC,<50% IN BED DURING THE DAYHYPERTENSION SEVERE DOSE REDUCED 5 12 0.243005526 0.29071550524
ALIVE 2MALE 74II 1.13-0.30-1.70 -32 0.27-0.25 -12.0 1.2 -5.60 -5.60-0.07 2119.62 15.60 0.20-0.30-0.10 56.3PANCREAS CANCER CHEMOSYMPTOMATIC,<50% IN BED DURING THE DAYHYPERLIPIDEMIA SEVERE DOSE REDUCED 5 0-0.005084746-0.015906128 6
ALIVE 3FEMALE52I 3.14 0.03 0.40 -9 0.07 0.05 91.0 8.9 4.30 16.80 0.09 922.19 -4.80-0.37 0.77-0.08 42.4EWING SARCOMA CHEMOASYMTOMATIC DIABETES MELLITUS NO SIDE EFFECTS DOSE NOT CHANGED 5 0 0.087355212-0.17004504532
ALIVE 4FEMALE74IV 2.19 0.44 1.30 59 -0.01-0.24 40.0 1.9 -3.20 -8.60 0.14 2417.78 5.60-0.16-0.01-0.23 -1.8HEAD AND NECK CHEMOSYMPTOMATIC BUT AMBULATORY HYPERTENSION MODERATE DOSE NOT CHANGED 6 1 0.105820106-0.07804232823
ALIVE13FEMALE70IV -1.92-0.90-1.60-233 -1.94 1.15-294.0 -3.3 -18.70 -6.40-0.19 2024.33 -1.20 0.08-1.01 0.22 -55.6GINECOLOGICAL CHEMOSYMPTOMATIC,<50% IN BED DURING THE DAYHYPERLIPIDEMIA NO SIDE EFFECTS UNKNOW 7 0 0.083963069 0.15995795015
ALIVE 2MALE 62III 1.53-0.61-0.40 -19 -0.05-0.21 -5.0-12.8 0.10 -4.20 0.88 1916.24 0.80-0.08 0.08 0.01 5.8LUNG CANCER CHEMOASYMTOMATIC CHRONIC INFLAMMATIONNO SIDE EFFECTS DOSE NOT CHANGED 8 0-0.244661571-0.31368686563
ALIVE 3MALE 79IV 4.23 0.38 1.20 170 0.59-0.50 -66.0 3.3-111.50 -18.30 0.24-11820.64 8.90-0.66-0.76-1.59 -9.1RENAL CANCER CHEMOASYMTOMATIC CHRONIC INFLAMMATIONNO SIDE EFFECTS DOSE NOT CHANGED 9 0 0.202321083 0.496711799 9
ALIVE14FEMALE69II 3.39-1.31-2.20 -59 -0.09-0.60 16.0 34.3 -4.70 -13.80 0.10 -4118.57 0.80-0.39 0.06-0.70 99.4PANCREAS CANCER CHEMOSYMPTOMATIC >50 % IN THE BED ATEROESCLEROSIS NO SIDE EFFECTS DOSE REDUCED 9 0 0.061267406-0.157317420 4
ALIVE10FEMALE65II -2.68 0.18 1.80 316 4.78-1.70 14.0 47.7 4.70 -1.20-0.56 1516.51 15.10 0.22-0.36 0.02 46.9GINECOLOGICAL CHEMOSYMPTOMATIC,<50% IN BED DURING THE DAYHYPERLIPIDEMIA SEVERE DOSE REDUCED 10 32-0.054315003-0.25631754621
ALIVE 4MALE 74III 1.51 0.83 2.60-110 -0.01 0.22 -81.0-30.1 -11.50 -9.90 0.14 -722.08 -1.00 0.09-0.12 0.18 -18.1LUNG CANCER CHEMOASYMTOMATIC HYPERTENSION MODERATE DOSE NOT CHANGED 12 36-0.159000071-0.136205070 3
ALIVE12MALE 76III-0.49-0.11-0.40 -56 0.04-0.01 31.0-13.4 7.20 5.20 0.36 -5626.80 -2.20 0.24 0.44 0.63-110.2LUNG CANCER CHEMOSYMPTOMATIC BUT AMBULATORY ATEROESCLEROSIS SEVERE ADDED OTHER TREATMEN14 51-0.040974878 0.214845830 6
ALIVE 4FEMALE72III 2.00 0.84 2.40 -16 -0.01 0.22 1.0 1.4 -6.70 -4.20 0.06 215.43 4.50-0.80 0.90 0.14 18.0EWING SARCOMA CHEMOASYMTOMATIC CANCER RRECURRENCE NO SIDE EFFECTS ADDED OTHER TREATMEN17 0-0.292250233-0.61923436020
ALIVE14FEMALE76III 3.86 1.15 3.80 15 -3.88-1.50 30.0 -7.6 18.90 18.30-0.10 -2329.56 3.60 0.66 0.00 0.50 26.7GINECOLOGICAL CHEMOSYMPTOMATIC,<50% IN BED DURING THE DAYHYPERTENSION NO SIDE EFFECTS UNKNOW 19 0-0.314585877-0.18220899520
ALIVE 4FEMALE56III-3.11-0.22-0.50 4 -0.10-0.08 9.0-20.0 -2.90 -1.60-0.03 4318.03 -1.30-0.63 0.21 0.49 2.6LUNG CANCER CHEMOSYMPTOMATIC BUT AMBULATORY DIABETES MELLITUS NO SIDE EFFECTS UNKNOW 20 0 0.027777778-0.125000000 1
ALIVE 4MALE 70III-0.19-0.44-0.70 -17 -0.27 0.13 4.0 -5.7 -7.60 -12.00 0.36-16023.75 -5.60-0.55-1.20 0.10 31.0LUNG CANCER CHEMOSYMPTOMATIC,<50% IN BED DURING THE DAYCHRONIC INFLAMMATIONNO SIDE EFFECTS DOSE NOT CHANGED 20 0-0.095525727-0.514605305 3
ALIVE 4FEMALE75III-2.30 0.14 0.40 -82 0.02-0.31 16.0-25.1 -6.10 -9.40 0.73 025.78 -4.90 0.10 0.20-0.29 2.4GINECOLOGICAL CHEMOASYMTOMATIC HYPERTENSION SEVERE DOSE REDUCED 33312 0.048263534 0.141343207 5
DEATH 9MALE 81II 0.10-0.19-1.10 1 1.19-0.06 -4.0 -2.2 -5.50 -8.80 1.07 1616.17 0.00 0.34-0.82-0.65 -68.5GASTRIC CANCER CHEMOASYMTOMATIC CANCER RRECURRENCE MODERATE DRUG INTERRUPTED 9 86 0.226870727 0.43893871922
DEATH 9MALE 73III-1.94 0.13 0.40 75 -1.67-2.28 32.0-32.7 15.60 -2.40 0.51 -116.90 26.20 0.20 0.70 0.52 -3.1PANCREAS CANCER CHEMOSYMPTOMATIC BUT AMBULATORY HYPERLIPIDEMIA SEVERE DRUG INTERRUPTED 4195 0.065394796 0.09726844416
DEATH 5MALE 77I -2.19-1.97-5.20-151 0.39 0.12 34.0 3.8 11.90 19.40-0.64 3025.21 5.30-0.14-0.72 0.30 -7.6LUNG CANCER CHEMOASYMTOMATIC CHRONIC INFLAMMATIONNO SIDE EFFECTS DRUG WIHDRAWN 5 0-0.080303030-0.620707071 9
DEATH 6MALE 74I 5.48-0.77-2.80 15 7.21-0.37 472.0 6.8 -1.10 -2.60 0.35 2923.51 7.50 0.34 0.01-0.32 -5.6GASTRIC CANCER CHEMOASYMTOMATIC CANCER RRECURRENCE NO SIDE EFFECTS DOSE NOT CHANGED 12 0-0.022968112-0.097195217 1
DEATH 6MALE 70I 0.38 0.59 0.20 -9 0.25-0.05 18.0 -6.3 -0.80 -8.60 0.63 1716.68 18.70-0.07 0.09-0.01 33.1LUNG CANCER CHEMOSYMPTOMATIC BUT AMBULATORY HYPERLIPIDEMIA SEVERE DOSE REDUCED 12 50-0.136822783-0.32110706913
DEATH 6MALE 63II 2.42 0.49 4.00 26 2.96-1.35 170.0 9.9 -55.90 -84.90-0.34 -7017.89 24.10-0.06 0.21-0.25 84.2PANCREAS CANCER CHEMOSYMPTOMATIC,<50% IN BED DURING THE DAYDIABETES MELLITUS NO SIDE EFFECTS DOSE REDUCED 7 0-0.195751634-0.07072829164
DEATH 5FEMALE61II -0.96-1.44-2.70 -12 0.03-0.12 15.0 -0.9 6.60 14.30 0.91 2920.44 6.90-0.41 0.24 0.51 34.2GINECOLOGICAL CHEMOASYMTOMATIC HYPERLIPIDEMIA HIGHT SEVERE DOSE REDUCED 11114-0.101647059 0.00262745112
DEATH 2MALE 79III-2.67-0.86-3.50 4 1.03-0.90 -34.0-39.2 -6.50 -6.00 0.73 117.30 4.80 0.05 0.74-0.34 -20.2LUNG CANCER CHEMOSYMPTOMATIC BUT AMBULATORY CANCER RRECURRENCE SEVERE DOSE REDUCED 3 0-0.502580577 0.36923320017
DEATH 3FEMALE68III-1.29-0.52-1.60 79 -0.02 0.05 -23.0 9.5 -5.80 -9.20 0.14 1725.78 3.10 1.13 1.83-0.87 18.5SARCOMA CHEMOASYMTOMATIC ATEROESCLEROSIS MODERATE DRUG INTERRUPTED 9 7 0.688749725 1.02003222716
DEATH 6FEMALE67III-2.68-1.02 0.60-258 -0.22-0.17 -5.0 -2.6 -0.01 0.62 0.25 -619.07 8.70 0.12 0.39 0.77 2.7HEAD AND NECK CHEMOSYMPTOMATIC BUT AMBULATORY DIABETES MELLITUS NO SIDE EFFECTS DOSE REDUCED 1 0 0.217516526 0.28936650747
DEATH10MALE 87III 2.06-0.29-1.00 197 -2.64 0.20 -65.0 9.3 13.20 3.30-0.28 -123.76 6.60 0.05-0.45-0.38 65.2GASTRIC CANCER CHEMOSYMPTOMATIC BUT AMBULATORY HYPERTENSION SEVERE DOSE REDUCED 10 33-0.256666667-0.53833333330
DEATH 4MALE 78III-0.31-0.29-0.90 31 -0.15-0.16 5.0-26.4 -3.50 -10.70 0.48 -219.86 3.60 0.33 0.41-0.35 38.9LUNG CANCER CHEMOBEDBOUND CHRONIC INFLAMMATIONMODERATE DRUG INTERRUPTED 8 7-0.135169763 0.37681750232
DEATH 8FEMALE53I -0.36-0.45-1.20 -40 0.39-0.11 5.0 -5.9 1.70 -3.00 0.05 1819.26 4.60 0.08-0.13-0.16 24.0PANCREAS CANCER CHEMOSYMPTOMATIC BUT AMBULATORY HYPERLIPIDEMIA SLIGTHLY SIDE EFFCTSDOSE REDUCED 3 7 0.162247681-0.01971787123
In [42]:
datos<-as.data.frame(datos)
datos
A data.frame: 59 × 34
STATUSCYCLES_BETWEEN_PET1_PET2GENDERAGETNM_STAGEDIFF_WBCDIFF_RBCDIFF_HBDIFF_PLTDIFF_CRPDIFF_ALBUMINDIFF_LDHDIFF_eGFRDIFF_ASTDIFF_ALTDIFF_KDIFF_BGLBMIDIFF_BWDIFF_SPLEEN_UPTAKEDIFF_BM_UPTAKEDIFF_LIVER_UPTAKEDIFF_ESTIMATED_SPLEEN_VOLDIAGNOSTICTREATMENTECOGPSCOMORBIDITIESCTCNCIACTION_TAKEN_TIME_BETWEE_PETdiasDIFF_SLRDIFF_BMLROVERALL_TIME
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
1 32543-1.83-0.64-2.20 -82 -0.56 0.98-271.0 23.7 0.30 13.20 0.10 1222.90 -4.00 0.02 0.12 0.44 -80.9 324545 2 25 0.029335236 0.109059370 2
1 21794-3.08 0.14 0.70-159 -3.80 1.33 22.0 -0.2 4.40 5.30 0.58 -1416.47 0.30-0.20-0.60-0.30 6.2 421635 4 0-0.156566790-0.04688961250
1 32603-3.18-0.50 3.54 22 -0.06-0.20 64.2 4.6 -2.60 1.10 0.33 1220.15 -8.10-0.12 0.34 0.27 9.1 525424 4 14-0.044698028-0.365688558 2
1 32762-1.28 0.79 1.70 9 0.04-0.12 -55.0 -8.9 -0.70 -6.70 0.05 -422.83 0.00 0.46-0.40 0.59 -38.8 821633 6 0-0.002886671 0.13061297212
1 51702 0.00-0.09-0.40 -95 -7.23 0.05 6.0-13.2 2.70 2.20 0.06 -818.29 -0.10-0.34-0.80-0.48 -87.6 624433 6 0-0.095215760-0.25268024712
1 42544 0.99 0.10 0.00 -3 0.02-0.16 -4.0-13.1 0.60 2.80-0.47 -327.18-10.80 0.00-0.10-0.53 34.4 521213 6363-0.167591661 0.07251378818
1 22603-1.48-1.23-1.90 -60 0.02-0.46 -16.0 14.1 5.20 0.60-0.26 -1222.76 1.40-0.07 0.11-0.17 -85.0 521333 9 0 0.114290017 0.200158109 7
1 52693-1.65 0.04 0.60-354 4.66 0.52-333.0 -9.2 -7.30 -23.90-0.02 -7017.36 -2.20 0.61-0.18 0.61 11.3 425423 9 95 0.223201621 0.31414097616
1 62644 3.89-0.78-3.20 242 15.21-1.22 -28.0 9.0 -10.40 -13.20-0.23 7023.65-11.10 0.30 0.50 0.03 8.1 42455310 28 0.163665694 0.07051774565
1122723-0.80-0.37-0.90 -26 -0.02-0.09 -13.0 -2.1 0.00 -1.90 0.26 6422.84 -6.60 0.24 0.48 0.65 -51.4 42133316 0-0.117647059 0.083556150 1
1292713-0.91 0.07 0.20 11 0.18 0.18 52.0 -8.7 -0.60 -0.10 0.59 1121.34 0.00-0.05 0.69 0.09 -46.1 42163121 0 0.312678365 0.61977621441
1 62673-2.67-0.86 2.30 -4 -0.36-0.72 69.0 16.9 5.00 3.80 0.05 1120.29 -1.10 0.20 0.70 0.52 66.9 52123325 0-0.160987704-0.06144878811
1482603 4.52 0.11-1.90 -63 17.83-0.61 -15.0-24.1 -4.10 -4.40-0.67 6429.11 -6.20 1.13 2.08 0.31 177.0 82155725735 0.413640470-0.03712737111
1202614 1.71 0.89 3.70-183 -1.24 1.74 86.0 -9.4 -0.50 1.80-0.17 -1120.12 0.00 0.15-0.22 0.57 -44.5 62543738 0 0.125985126 0.39696414715
2 22832 2.37-0.12 1.60 44 0.00 0.00 0.0 0.0 0.00 0.00 0.00 1424.46 -1.50 0.10 0.35-0.03 31.6 62123311 0 0.253714576 0.288722186 4
2 32681 3.22-1.11 0.90 -59 3.02-0.09 5.0-19.6 -11.60 -7.50 0.17 624.41 -4.30-0.05-0.45 0.27 -2.1 625233 2 0 0.207638889 0.08194444413
2182772 0.37 0.63 1.90 -24 0.22-0.06 46.0 7.5 6.50 4.10 0.48 -1218.07 0.00-0.13 0.20-0.14 -2.4 82134210 3-0.117647059 0.08355615012
2 42603-1.23-0.28-1.20 -11 2.69 0.10 194.0-10.2 10.30 7.50-0.01 123.14 4.00-0.42-0.40-0.74 55.2 52432715441 0.030665281 0.13326403329
2 72644 0.46-0.41-1.50 34 0.31-0.44 -8.0 -4.6 -1.10 0.40 0.44 -622.12 5.70 0.27 0.46-0.37 15.1 425425 4 52 0.135997522 0.20012391642
2152672-4.64 0.71 2.40-189-15.88 1.57-510.0-24.5 -2.90 -16.90 0.00 -2714.08 44.02-0.40-0.19 0.19 2.4 42445513364-0.513598150-0.14050021021
2 52834 1.13-0.21-0.60 16 0.32 0.08 -9.0 -2.8 0.70 0.40 0.59 1620.07 5.90 0.22-0.21 0.34 -85.5 425633 8 0-0.193627043-0.09373884921
2 41562 7.74-0.33-0.80 463 -2.03 2.83-140.0 45.3-219.60-250.80-0.90 -1821.79 -2.30-0.16 0.26-0.84 34.0 624514 9233-0.045342594 0.034875129 8
1 72652-1.68-0.68 1.00 0 7.55-0.13 8.0 -3.3 -6.90 -5.70 0.04 1418.18 12.20 0.31-0.54-0.85 22.3 711437 1 0 0.004147110 0.16218735927
1 51554-3.77-1.93 1.40 -58 0.12 0.01 -29.0 6.7 -3.10 -10.20 0.48 2525.60 4.60 0.40 0.59-0.24 29.9 315545 2 20-0.230769231-0.33076923110
1 21753-2.17-0.94-2.30-100 0.00 0.27 21.0-10.4 1.80 -2.40-0.20 825.96 -5.40 0.19 0.65 0.88 -1.0 514624 2 15 0.071091909 0.51158680341
1 62743-3.24-1.16-0.20 100 -0.46-0.56 41.0 3.0 4.40 0.80 0.68 920.85 9.30 0.00-0.20 0.60 19.0 711624 3 35 0.509097867 0.149792993 3
1 31764-0.04-0.43-0.80 95 0.02-0.10 103.0 -2.2 7.90 0.10-0.33 220.90 -2.40-0.12 0.84-0.27 19.7 511433 3 0 0.123854660 0.02860614932
1 32773-5.70-1.29-2.60-229 0.18-0.21 45.0 -9.7 -19.70 -84.70-0.10 -1322.94 0.00 0.03-0.33 0.84 98.8 511614 3 6 0.047663153-0.21905644015
1 42682-0.47 0.05 5.80 69 -2.74 1.18 67.0-16.2 1.20 -0.70 0.76 -2021.57 5.40 0.26 0.55 0.46 4.1 511633 4 0-0.026468254 0.04327381014
1 22682-2.47-0.73-1.00 -61 -0.10-0.12 25.0 13.4 -0.70 -2.10 0.19 -1335.81 -1.30-0.05 0.43-0.07 15.1 514433 4 0-0.318764676-0.09074958415
1 31864-4.78-1.30-3.50 -17 -6.38-0.12 -2.0 -3.0 -15.20 -19.30 0.23 -826.75 -2.40 0.30 0.19 0.35 41.01015644 5 12 0.243005526 0.29071550524
1 22742 1.13-0.30-1.70 -32 0.27-0.25 -12.0 1.2 -5.60 -5.60-0.07 2119.62 15.60 0.20-0.30-0.10 56.3 715544 5 0-0.005084746-0.015906128 6
1 31521 3.14 0.03 0.40 -9 0.07 0.05 91.0 8.9 4.30 16.80 0.09 922.19 -4.80-0.37 0.77-0.08 42.4 111433 5 0 0.087355212-0.17004504532
1 41744 2.19 0.44 1.30 59 -0.01-0.24 40.0 1.9 -3.20 -8.60 0.14 2417.78 5.60-0.16-0.01-0.23 -1.8 414623 6 1 0.105820106-0.07804232823
1131704-1.92-0.90-1.60-233 -1.94 1.15-294.0 -3.3 -18.70 -6.40-0.19 2024.33 -1.20 0.08-1.01 0.22 -55.6 315537 7 0 0.083963069 0.15995795015
1 22623 1.53-0.61-0.40 -19 -0.05-0.21 -5.0-12.8 0.10 -4.20 0.88 1916.24 0.80-0.08 0.08 0.01 5.8 511333 8 0-0.244661571-0.31368686563
1 32794 4.23 0.38 1.20 170 0.59-0.50 -66.0 3.3-111.50 -18.30 0.24-11820.64 8.90-0.66-0.76-1.59 -9.1 811333 9 0 0.202321083 0.496711799 9
1141692 3.39-1.31-2.20 -59 -0.09-0.60 16.0 34.3 -4.70 -13.80 0.10 -4118.57 0.80-0.39 0.06-0.70 99.4 713134 9 0 0.061267406-0.157317420 4
1101652-2.68 0.18 1.80 316 4.78-1.70 14.0 47.7 4.70 -1.20-0.56 1516.51 15.10 0.22-0.36 0.02 46.9 31554410 32-0.054315003-0.25631754621
1 42743 1.51 0.83 2.60-110 -0.01 0.22 -81.0-30.1 -11.50 -9.90 0.14 -722.08 -1.00 0.09-0.12 0.18 -18.1 51162312 36-0.159000071-0.136205070 3
1122763-0.49-0.11-0.40 -56 0.04-0.01 31.0-13.4 7.20 5.20 0.36 -5626.80 -2.20 0.24 0.44 0.63-110.2 51414114 51-0.040974878 0.214845830 6
1 41723 2.00 0.84 2.40 -16 -0.01 0.22 1.0 1.4 -6.70 -4.20 0.06 215.43 4.50-0.80 0.90 0.14 18.0 11123117 0-0.292250233-0.61923436020
1141763 3.86 1.15 3.80 15 -3.88-1.50 30.0 -7.6 18.90 18.30-0.10 -2329.56 3.60 0.66 0.00 0.50 26.7 31563719 0-0.314585877-0.18220899520
1 41563-3.11-0.22-0.50 4 -0.10-0.08 9.0-20.0 -2.90 -1.60-0.03 4318.03 -1.30-0.63 0.21 0.49 2.6 51443720 0 0.027777778-0.125000000 1
1 42703-0.19-0.44-0.70 -17 -0.27 0.13 4.0 -5.7 -7.60 -12.00 0.36-16023.75 -5.60-0.55-1.20 0.10 31.0 51533320 0-0.095525727-0.514605305 3
1 41753-2.30 0.14 0.40 -82 0.02-0.31 16.0-25.1 -6.10 -9.40 0.73 025.78 -4.90 0.10 0.20-0.29 2.4 31164433312 0.048263534 0.141343207 5
2 92812 0.10-0.19-1.10 1 1.19-0.06 -4.0 -2.2 -5.50 -8.80 1.07 1616.17 0.00 0.34-0.82-0.65 -68.5 211225 9 86 0.226870727 0.43893871922
2 92733-1.94 0.13 0.40 75 -1.67-2.28 32.0-32.7 15.60 -2.40 0.51 -116.90 26.20 0.20 0.70 0.52 -3.1 714545 4195 0.065394796 0.09726844416
2 52771-2.19-1.97-5.20-151 0.39 0.12 34.0 3.8 11.90 19.40-0.64 3025.21 5.30-0.14-0.72 0.30 -7.6 511336 5 0-0.080303030-0.620707071 9
2 62741 5.48-0.77-2.80 15 7.21-0.37 472.0 6.8 -1.10 -2.60 0.35 2923.51 7.50 0.34 0.01-0.32 -5.6 21123312 0-0.022968112-0.097195217 1
2 62701 0.38 0.59 0.20 -9 0.25-0.05 18.0 -6.3 -0.80 -8.60 0.63 1716.68 18.70-0.07 0.09-0.01 33.1 51454412 50-0.136822783-0.32110706913
2 62632 2.42 0.49 4.00 26 2.96-1.35 170.0 9.9 -55.90 -84.90-0.34 -7017.89 24.10-0.06 0.21-0.25 84.2 715434 7 0-0.195751634-0.07072829164
2 51612-0.96-1.44-2.70 -12 0.03-0.12 15.0 -0.9 6.60 14.30 0.91 2920.44 6.90-0.41 0.24 0.51 34.2 31151411114-0.101647059 0.00262745112
2 22793-2.67-0.86-3.50 4 1.03-0.90 -34.0-39.2 -6.50 -6.00 0.73 117.30 4.80 0.05 0.74-0.34 -20.2 514244 3 0-0.502580577 0.36923320017
2 31683-1.29-0.52-1.60 79 -0.02 0.05 -23.0 9.5 -5.80 -9.20 0.14 1725.78 3.10 1.13 1.83-0.87 18.5 911125 9 7 0.688749725 1.02003222716
2 61673-2.68-1.02 0.60-258 -0.22-0.17 -5.0 -2.6 -0.01 0.62 0.25 -619.07 8.70 0.12 0.39 0.77 2.7 414434 1 0 0.217516526 0.28936650747
2102873 2.06-0.29-1.00 197 -2.64 0.20 -65.0 9.3 13.20 3.30-0.28 -123.76 6.60 0.05-0.45-0.38 65.2 21464410 33-0.256666667-0.53833333330
2 42783-0.31-0.29-0.90 31 -0.15-0.16 5.0-26.4 -3.50 -10.70 0.48 -219.86 3.60 0.33 0.41-0.35 38.9 512325 8 7-0.135169763 0.37681750232
2 81531-0.36-0.45-1.20 -40 0.39-0.11 5.0 -5.9 1.70 -3.00 0.05 1819.26 4.60 0.08-0.13-0.16 24.0 714554 3 7 0.162247681-0.01971787123

Exploracion inicial¶

In [6]:
# Tipos de varibales                    
aux = as.data.frame(t(t(sapply(datos, class))))
colnames(aux) <- "Tipo de Variable"
aux$Numero_Columna = seq(nrow(aux))
aux
A data.frame: 34 × 2
Tipo de VariableNumero_Columna
<chr><int>
STATUScharacter 1
CYCLES_BETWEEN_PET1_PET2numeric 2
GENDERcharacter 3
AGEnumeric 4
TNM_STAGEcharacter 5
DIFF_WBCnumeric 6
DIFF_RBCnumeric 7
DIFF_HBnumeric 8
DIFF_PLTnumeric 9
DIFF_CRPnumeric 10
DIFF_ALBUMINnumeric 11
DIFF_LDHnumeric 12
DIFF_eGFRnumeric 13
DIFF_ASTnumeric 14
DIFF_ALTnumeric 15
DIFF_Knumeric 16
DIFF_BGLnumeric 17
BMInumeric 18
DIFF_BWnumeric 19
DIFF_SPLEEN_UPTAKEnumeric 20
DIFF_BM_UPTAKEnumeric 21
DIFF_LIVER_UPTAKEnumeric 22
DIFF_ESTIMATED_SPLEEN_VOLnumeric 23
DIAGNOSTICcharacter24
TREATMENTcharacter25
ECOGPScharacter26
COMORBIDITIEScharacter27
CTCNCIcharacter28
ACTION_TAKEN_character29
TIME_BETWEE_PETnumeric 30
diasnumeric 31
DIFF_SLRnumeric 32
DIFF_BMLRnumeric 33
OVERALL_TIMEnumeric 34
In [7]:
overview(datos)  

plot(overview(datos))
A overview: 16 × 3
divisionmetricsvalue
<chr><chr><dbl>
size observations 59
size variables 34
size values 2006
size memory size 23968
duplicatedduplicate observation 0
missing complete observation 59
missing missing observation 0
missing missing variables 0
missing missing values 0
data type numerics 25
data type integers 0
data type factors/ordered 0
data type characters 9
data type Dates 0
data type POSIXcts 0
data type others 0

ORDENAR Y LIMPIAR DATOS¶

No quitamos ni outliers ni extremos ya que son pocas observaciones y ademas son mediciones reales

Transformar tipos de variables¶

In [143]:
# pasar variables a factor
datos$STATUS <- as.factor(datos$STATUS)
datos$GENDER <- as.factor(datos$GENDER)
datos$COMORBIDITIES <- as.factor(datos$COMORBIDITIES)
datos$CTCNCI <- as.factor(datos$CTCNCI)
datos$ACTION_TAKEN_ <- as.factor(datos$ACTION_TAKEN_)
datos$TNM_STAGE <- as.factor(datos$TNM_STAGE)
datos$DIAGNOSTIC <- as.factor(datos$DIAGNOSTIC)
datos$TREATMENT <- as.factor(datos$TREATMENT)
datos$ECOGPS <- as.factor(datos$ECOGPS)
In [9]:
# vemos los cambios
aux = as.data.frame(t(t(sapply(datos, class))))
colnames(aux) <- "Tipo de Variable"
aux$Numero_Columna = seq(nrow(aux))
aux
A data.frame: 34 × 2
Tipo de VariableNumero_Columna
<chr><int>
STATUSfactor 1
CYCLES_BETWEEN_PET1_PET2numeric 2
GENDERfactor 3
AGEnumeric 4
TNM_STAGEfactor 5
DIFF_WBCnumeric 6
DIFF_RBCnumeric 7
DIFF_HBnumeric 8
DIFF_PLTnumeric 9
DIFF_CRPnumeric10
DIFF_ALBUMINnumeric11
DIFF_LDHnumeric12
DIFF_eGFRnumeric13
DIFF_ASTnumeric14
DIFF_ALTnumeric15
DIFF_Knumeric16
DIFF_BGLnumeric17
BMInumeric18
DIFF_BWnumeric19
DIFF_SPLEEN_UPTAKEnumeric20
DIFF_BM_UPTAKEnumeric21
DIFF_LIVER_UPTAKEnumeric22
DIFF_ESTIMATED_SPLEEN_VOLnumeric23
DIAGNOSTICfactor 24
TREATMENTfactor 25
ECOGPSfactor 26
COMORBIDITIESfactor 27
CTCNCIfactor 28
ACTION_TAKEN_factor 29
TIME_BETWEE_PETnumeric30
diasnumeric31
DIFF_SLRnumeric32
DIFF_BMLRnumeric33
OVERALL_TIMEnumeric34

Identificar la variable respuesta¶

In [10]:
# Identificar variable respuesta
respuesta <- c("STATUS")
respuesta
'STATUS'

Identificar las variables cuantitativas de entrada¶

In [11]:
# Identificar variables numericas de entrada
tipos_var <- t(t(sapply(datos, class)))
var_num <- colnames(datos)[tipos_var=="integer"|tipos_var=="numeric"]
var_num <- var_num[var_num!=respuesta]
as.data.frame(var_num)
A data.frame: 25 × 1
var_num
<chr>
CYCLES_BETWEEN_PET1_PET2
AGE
DIFF_WBC
DIFF_RBC
DIFF_HB
DIFF_PLT
DIFF_CRP
DIFF_ALBUMIN
DIFF_LDH
DIFF_eGFR
DIFF_AST
DIFF_ALT
DIFF_K
DIFF_BGL
BMI
DIFF_BW
DIFF_SPLEEN_UPTAKE
DIFF_BM_UPTAKE
DIFF_LIVER_UPTAKE
DIFF_ESTIMATED_SPLEEN_VOL
TIME_BETWEE_PET
dias
DIFF_SLR
DIFF_BMLR
OVERALL_TIME

Identificar las variables cualitativas de entrada¶

In [12]:
# Identificar variables cualitativas
tipos_var <- t(t(sapply(datos, class)))
var_cual <- colnames(datos)[tipos_var=="character"|tipos_var=="factor"]
var_cual <- var_cual[var_cual!=respuesta]
as.data.frame(var_cual)
A data.frame: 8 × 1
var_cual
<chr>
GENDER
TNM_STAGE
DIAGNOSTIC
TREATMENT
ECOGPS
COMORBIDITIES
CTCNCI
ACTION_TAKEN_

EXPLORACION UNIVARIADA DE LOS DATOS¶

Describir la variable respuesta¶

In [13]:
# Tabla de frecuencias:

as.data.frame(table(datos$STATUS))
A data.frame: 2 × 2
Var1Freq
<fct><int>
ALIVE38
DEATH21
In [14]:
# Barplot
barplot(table(datos$STATUS))

Descripcion variables cuantitativas¶

Descripcion estadistica variables cuantitativas de entrada¶

In [15]:
# Descripcion de variables cuantitativas:
d_uni<-dlookr::describe(datos)
d_uni
A tibble: 25 × 26
described_variablesnnameansdse_meanIQRskewnesskurtosisp00p01p05p10p20p25p30p40p50p60p70p75p80p90p95p99p100
<chr><int><int><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
CYCLES_BETWEEN_PET1_PET2 590 7.06779661 7.4506755 0.96999533 4.5000000 3.546136216.079740747 2.0000000 2.0000000 2.0000000 2.0000000 3.0000000 3.0000000 3.40000000 4.00000000 5.000000000 5.80000000 6.00000000 7.5000000 9.4000000 14.0000000 18.2000000 36.9800000 48.0000000
AGE 590 69.38983051 8.6223786 1.1225380712.5000000-0.1966440-0.589411859 52.0000000 52.5800000 54.0000000 56.0000000 61.0000000 63.5000000 65.00000000 68.00000000 70.00000000072.8000000074.6000000076.000000076.4000000 79.0000000 83.0000000 86.4200000 87.0000000
DIFF_WBC 590 -0.17525424 2.7263955 0.35494646 3.8000000 0.5057711 0.129045347 -5.7000000 -5.1664000 -3.8570000 -3.1240000 -2.5500000 -2.1800000 -1.88400000 -1.27000000 -0.470000000 0.08000000 1.13000000 1.6200000 2.1120000 3.4840000 4.2590000 6.4292000 7.7400000
DIFF_RBC 590 -0.31101695 0.7000066 0.09113310 0.8950000-0.1435844-0.229988897 -1.9700000 -1.9468000 -1.3230000 -1.2420000 -0.8760000 -0.7750000 -0.66400000 -0.43800000 -0.290000000-0.13400000 0.06200000 0.1200000 0.1560000 0.6460000 0.8310000 0.9992000 1.1500000
DIFF_HB 590 -0.09593220 2.1214367 0.27618753 2.8000000 0.3373839 0.254904377 -5.2000000 -4.2140000 -3.2300000 -2.6200000 -1.7800000 -1.5500000 -1.16000000 -0.80000000 -0.400000000 0.36000000 0.82000000 1.2500000 1.6400000 2.4400000 3.7100000 4.7560000 5.8000000
DIFF_PLT 590-13.27118644129.064516116.8027688084.5000000 0.7237460 3.418977080-354.0000000-298.3200000-229.4000000-163.8000000-82.0000000-60.5000000-58.60000000-23.00000000-11.000000000 0.8000000015.0000000024.000000038.0000000 96.0000000201.5000000377.7400000463.0000000
DIFF_CRP 590 0.33372881 4.4082997 0.57391174 0.5600000 0.9022780 8.168553321 -15.8800000 -10.8630000 -4.1300000 -2.6600000 -0.5000000 -0.2450000 -0.10000000 -0.01800000 0.020000000 0.04000000 0.23800000 0.3150000 0.4700000 3.3480000 7.2440000 16.3104000 17.8300000
DIFF_ALBUMIN 590 -0.05762712 0.7953146 0.10354114 0.3900000 0.6459202 3.439907176 -2.2800000 -1.9436000 -1.3650000 -0.7560000 -0.4480000 -0.2800000 -0.21000000 -0.12800000 -0.100000000-0.05200000 0.05000000 0.1100000 0.1880000 1.0140000 1.3540000 2.1978000 2.8300000
DIFF_LDH 590 -2.79322034125.857905516.3853036547.0000000-0.8048823 7.846212457-510.0000000-407.3400000-273.3000000 -69.0000000-25.0000000-14.0000000 -8.60000000 -1.60000000 5.00000000015.8000000028.0000000033.000000042.6000000 72.4000000109.7000000310.7600000472.0000000
DIFF_eGFR 590 -2.49830508 16.4224247 2.1380175815.9500000 0.6730682 1.790030255 -39.2000000 -35.4300000 -26.7700000 -24.1800000-13.1400000-10.3000000 -9.32000000 -5.86000000 -2.800000000-0.34000000 3.18000000 5.6500000 8.0600000 13.5400000 24.7600000 46.3080000 47.7000000
DIFF_AST 590 -7.27813559 33.1784402 4.31946500 9.4500000-5.248454130.880490044-219.6000000-156.9020000 -23.3200000 -12.3200000 -6.7800000 -5.9500000 -5.18000000 -2.90000000 -0.700000000 0.00000000 1.50000000 3.5000000 4.5200000 7.3400000 12.0300000 16.9860000 18.9000000
DIFF_ALT 590 -9.48949153 36.3504299 4.73242289 9.9500000-5.493682834.708950221-250.8000000-154.5780000 -29.9800000 -17.1800000-10.0200000 -9.0000000 -8.16000000 -5.36000000 -2.400000000-0.80000000 0.52000000 0.9500000 2.4400000 5.7400000 14.5500000 18.7620000 19.4000000
DIFF_K 590 0.14796610 0.4109064 0.05349546 0.5450000-0.1046069-0.003291498 -0.9000000 -0.7666000 -0.5680000 -0.3320000 -0.1780000 -0.0850000 -0.01600000 0.05000000 0.100000000 0.22200000 0.35600000 0.4600000 0.4920000 0.6900000 0.7720000 0.9772000 1.0700000
DIFF_BGL 590 -1.27118644 37.1413101 4.8353867128.0000000-1.8484106 6.368310476-160.0000000-135.6400000 -70.0000000 -29.8000000-13.0000000-11.5000000 -7.60000000 -1.80000000 2.00000000010.6000000014.6000000016.500000018.4000000 29.0000000 45.1000000 66.5200000 70.0000000
BMI 590 21.54457627 4.0359947 0.52544175 5.5200000 0.7826975 1.399355371 14.0800000 14.8630000 16.2330000 16.6460000 17.9740000 18.2350000 19.14600000 20.17800000 21.34000000022.6460000023.3620000023.755000024.4300000 26.1180000 27.3730000 32.1850000 35.8100000
DIFF_BW 590 3.00033898 9.1860843 1.19592631 7.8500000 2.0129510 6.438370916 -11.1000000 -10.9260000 -6.7500000 -5.4400000 -2.4000000 -2.2000000 -1.30000000 0.00000000 0.300000000 3.60000000 5.10000000 5.6500000 6.7200000 12.7800000 19.2400000 33.6844000 44.0200000
DIFF_SPLEEN_UPTAKE 590 0.05067797 0.3633743 0.04730731 0.3650000 0.4149319 1.664027013 -0.8000000 -0.7188000 -0.5580000 -0.4020000 -0.1600000 -0.1250000 -0.07600000 -0.04000000 0.050000000 0.11600000 0.21200000 0.2400000 0.2820000 0.3520000 0.6150000 1.1300000 1.1300000
DIFF_BM_UPTAKE 590 0.10711864 0.6017591 0.07834236 0.7100000 0.6111772 1.721229694 -1.2000000 -1.0898000 -0.8020000 -0.6240000 -0.3760000 -0.2600000 -0.19600000 -0.00800000 0.110000000 0.21000000 0.40200000 0.4500000 0.5200000 0.7080000 0.8460000 1.9350000 2.0800000
DIFF_LIVER_UPTAKE 590 0.01779661 0.5004070 0.06514744 0.7450000-0.5918683 0.488986741 -1.5900000 -1.1724000 -0.8410000 -0.6600000 -0.3440000 -0.2950000 -0.24600000 -0.09600000 0.020000000 0.18800000 0.32800000 0.4500000 0.5040000 0.6020000 0.6620000 0.8568000 0.8800000
DIFF_ESTIMATED_SPLEEN_VOL590 8.61694915 50.0321902 6.5136363540.1500000 0.1688579 1.731668976-110.2000000 -97.0920000 -85.0500000 -58.1800000-18.9400000 -6.6000000 -2.28000000 2.62000000 9.10000000018.9000000030.5600000033.550000036.2000000 58.0800000 85.6600000131.9920000177.0000000
TIME_BETWEE_PET 590 9.62711864 7.6404060 0.99469613 8.0000000 1.6870456 3.317725303 1.0000000 1.0000000 2.0000000 2.8000000 4.0000000 4.0000000 5.00000000 6.00000000 8.000000000 9.0000000010.6000000012.000000013.4000000 20.0000000 25.0000000 35.1000000 38.0000000
dias 590 57.15254237133.928289317.4359781334.0000000 3.294971712.075154478 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000 0.000000000 7.0000000026.8000000034.000000050.4000000202.6000000363.1000000564.4800000735.0000000
DIFF_SLR 590 0.00266275 0.2193508 0.02855705 0.2609162 0.3392356 1.204043701 -0.5135982 -0.5072080 -0.3150038 -0.2470626 -0.1597951 -0.1359963 -0.11124706 -0.04521368 -0.002886671 0.04814346 0.09843415 0.1249199 0.1628149 0.2300977 0.3227746 0.5845516 0.6887497
DIFF_BMLR 590 0.02945905 0.3024081 0.03937018 0.3195083 0.3246088 1.244142990 -0.6207071 -0.6198529 -0.5169781 -0.3230395 -0.1749106 -0.1383526 -0.09581267 -0.04493716 0.034875129 0.08355615 0.14641308 0.1811556 0.2443964 0.3808468 0.4981993 0.7878837 1.0200322
OVERALL_TIME 590 19.10169492 15.8415592 2.0623953415.0000000 1.3537447 1.563910333 1.0000000 1.0000000 1.9000000 3.0000000 6.0000000 8.5000000 10.40000000 12.20000000 15.00000000017.8000000021.6000000023.500000029.4000000 41.2000000 51.3000000 64.4200000 65.0000000

Descripcion grafica variables numericas¶

In [16]:
for (i in 1:length(var_num)) {
  name_i <- as.name(var_num[i])
  r <- list()
  
  # Histograma de densidad
  r[[1]]<-ggplot(datos, aes(x=!!name_i)) +
    geom_density(fill="#69b3a2", color="black", alpha=0.6)+
    theme_light() +
    #theme(legend.position = "none") +
    xlab("") +
    ylab("Densidad de Frecuencia")
  
  # Boxplot
  r[[2]]<-ggplot(data=datos, aes(y=!!name_i)) +
    geom_boxplot(size = 0.4) +
    theme_light() +
    theme(legend.position = "none")+
    xlab("")
  
  # Qqplot
  r[[3]]<-ggqqplot(datos, x = var_num[i],color = "#FF6666",add.params = list(color = "black"))+
    xlab("") + ylab("Cuartiles reales") +
    theme_minimal() +
    #ggtitle("qqplot") +
    theme(plot.title = element_text(hjust = 0.5))
  
  grid.arrange(r[[1]], r[[2]], r[[3]],
               nrow=1, ncol = 3,
               top = textGrob(var_num[i],gp=gpar(fontsize=16,font=3)))
}

Analisis de outliers(podemos ver algun dato que nos resulte interesante)¶

In [17]:
# Analisis de outliers:
diag_out <- diagnose_outlier(datos)
diag_out
A tibble: 25 × 6
variablesoutliers_cntoutliers_ratiooutliers_meanwith_meanwithout_mean
<chr><int><dbl><dbl><dbl><dbl>
CYCLES_BETWEEN_PET1_PET2 5 8.474576 26.00000000 7.06779661 5.314814815
AGE 0 0.000000 NaN 69.38983051 69.389830508
DIFF_WBC 1 1.694915 7.74000000 -0.17525424 -0.311724138
DIFF_RBC 0 0.000000 NaN -0.31101695 -0.311016949
DIFF_HB 1 1.694915 5.80000000 -0.09593220 -0.197586207
DIFF_PLT 1016.949153 12.50000000-13.27118644-18.530612245
DIFF_CRP 2135.593220 0.84142857 0.33372881 0.053157895
DIFF_ALBUMIN 1322.033898 0.14076923 -0.05762712 -0.113695652
DIFF_LDH 813.559322-89.00000000 -2.79322034 10.729411765
DIFF_eGFR 4 6.779661 22.02500000 -2.49830508 -4.281818182
DIFF_AST 4 6.779661-92.02500000 -7.27813559 -1.114727273
DIFF_ALT 610.169492-60.98333333 -9.48949153 -3.660000000
DIFF_K 0 0.000000 NaN 0.14796610 0.147966102
DIFF_BGL 813.559322-34.50000000 -1.27118644 3.941176471
BMI 1 1.694915 35.81000000 21.54457627 21.298620690
DIFF_BW 4 6.779661 28.25500000 3.00033898 1.163636364
DIFF_SPLEEN_UPTAKE 3 5.084746 0.48666667 0.05067797 0.027321429
DIFF_BM_UPTAKE 2 3.389831 1.95500000 0.10711864 0.042280702
DIFF_LIVER_UPTAKE 1 1.694915 -1.59000000 0.01779661 0.045517241
DIFF_ESTIMATED_SPLEEN_VOL 915.254237-15.83333333 8.61694915 13.018000000
TIME_BETWEE_PET 4 6.779661 30.25000000 9.62711864 8.127272727
dias 1016.949153293.80000000 57.15254237 8.857142857
DIFF_SLR 1 1.694915 0.68874973 0.00266275 -0.009166336
DIFF_BMLR 3 5.084746 -0.07330307 0.02945905 0.034964164
OVERALL_TIME 5 8.474576 57.80000000 19.10169492 15.518518519
In [18]:
# Plot analisis de outliers:
plot_outlier(datos)
In [23]:
# Plot de normalidad:
plot_normality(datos)

Correlacion variables cuantitativas¶

In [25]:
vec_var_num = c('CYCLES_BETWEEN_PET1_PET2',
                'AGE',
                'DIFF_WBC',
                'DIFF_RBC',
                'DIFF_HB',
                'DIFF_PLT',
                'DIFF_CRP',
                'DIFF_ALBUMIN',
                'DIFF_LDH',
                'DIFF_eGFR',
                'DIFF_AST',
                'DIFF_ALT',
                'DIFF_K',
                'DIFF_BGL',
                'BMI',
                'DIFF_BW',
                'DIFF_SPLEEN_UPTAKE',
                'DIFF_BM_UPTAKE',
                'DIFF_LIVER_UPTAKE',
                'DIFF_ESTIMATED_SPLEEN_VOL',
                'TIME_BETWEE_PET',
                'dias',
                'DIFF_SLR',
                'DIFF_BMLR',
                'OVERALL_TIME')

options(repr.plot.width=16, repr.plot.height=10)

plot_correlate(datos[,vec_var_num],method = "spearman")
In [26]:
# coeficiente de correlación
library(Hmisc)
vec_var_num = c('CYCLES_BETWEEN_PET1_PET2',
                'AGE',
                'DIFF_WBC',
                'DIFF_RBC',
                'DIFF_HB',
                'DIFF_PLT',
                'DIFF_CRP',
                'DIFF_ALBUMIN',
                'DIFF_LDH',
                'DIFF_eGFR',
                'DIFF_AST',
                'DIFF_ALT',
                'DIFF_K',
                'DIFF_BGL',
                'BMI',
                'DIFF_BW',
                'DIFF_SPLEEN_UPTAKE',
                'DIFF_BM_UPTAKE',
                'DIFF_LIVER_UPTAKE',
                'DIFF_ESTIMATED_SPLEEN_VOL',
                'TIME_BETWEE_PET',
                'dias',
                'DIFF_SLR',
                'DIFF_BMLR',
                'OVERALL_TIME')

# Correlaciones por parejas:

flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
  )
}


# Calculamos la matriz de correlacion y visualizamos la correlacion de mayor intensidad
tcor<-rcorr(as.matrix(datos[,vec_var_num]),type = "spearman")
corr_data <- flattenCorrMatrix(tcor$r, tcor$P) %>%
    arrange(desc(abs(cor))) #%>%
corr_data
options(warn = -1)
Loading required package: survival


Attaching package: 'survival'


The following object is masked from 'package:caret':

    cluster


Loading required package: Formula


Attaching package: 'Hmisc'


The following object is masked from 'package:dlookr':

    describe


The following objects are masked from 'package:dplyr':

    src, summarize


The following object is masked from 'package:plotly':

    subplot


The following objects are masked from 'package:base':

    format.pval, units


A data.frame: 300 × 4
rowcolumncorp
<chr><chr><dbl><dbl>
DIFF_AST DIFF_ALT 0.8500328780.000000e+00
DIFF_RBC DIFF_HB 0.6514146302.309223e-08
DIFF_SLR DIFF_BMLR 0.5876559819.917998e-07
BMI DIFF_BW -0.4918642267.608448e-05
DIFF_RBC TIME_BETWEE_PET 0.4567783902.763295e-04
CYCLES_BETWEEN_PET1_PET2 TIME_BETWEE_PET 0.4059972841.420657e-03
DIFF_LDH DIFF_AST 0.4037410501.519114e-03
DIFF_LDH DIFF_ALT 0.3910216142.197503e-03
DIFF_SPLEEN_UPTAKE DIFF_BMLR 0.3614163834.916694e-03
DIFF_PLT DIFF_ALBUMIN -0.3443760877.566172e-03
DIFF_WBC DIFF_LIVER_UPTAKE -0.3419691398.026443e-03
DIFF_WBC DIFF_RBC 0.3342977389.660078e-03
DIFF_PLT DIFF_LIVER_UPTAKE -0.3321691611.016160e-02
DIFF_CRP DIFF_BGL 0.3321103881.017576e-02
DIFF_HB DIFF_BGL -0.3242026501.224414e-02
DIFF_CRP DIFF_ALBUMIN -0.3127566621.587521e-02
DIFF_ESTIMATED_SPLEEN_VOLDIFF_BMLR -0.3122196411.606611e-02
DIFF_WBC TIME_BETWEE_PET 0.3091961721.717791e-02
DIFF_HB BMI -0.3088602811.730539e-02
DIFF_WBC DIFF_PLT 0.3067424601.812788e-02
DIFF_RBC BMI -0.3014451252.033234e-02
BMI DIFF_SPLEEN_UPTAKE 0.2984158562.169234e-02
DIFF_K DIFF_ESTIMATED_SPLEEN_VOL-0.2957518052.295131e-02
DIFF_RBC DIFF_BGL -0.2850399052.865490e-02
DIFF_PLT DIFF_eGFR 0.2675317474.051234e-02
DIFF_RBC DIFF_eGFR -0.2655288814.209569e-02
DIFF_ALBUMIN DIFF_ESTIMATED_SPLEEN_VOL-0.2632494124.395935e-02
TIME_BETWEE_PET OVERALL_TIME -0.2510153515.515436e-02
DIFF_K BMI -0.2459957946.037113e-02
AGE DIFF_K 0.2432698656.336676e-02
DIFF_WBC DIFF_CRP 0.2401063916.699187e-02
DIFF_eGFR DIFF_LIVER_UPTAKE -0.2396364546.754429e-02
CYCLES_BETWEEN_PET1_PET2 dias 0.2324762157.642104e-02
DIFF_HB DIFF_SLR -0.2321384237.686165e-02
CYCLES_BETWEEN_PET1_PET2 DIFF_RBC 0.2314696647.773990e-02
BMI OVERALL_TIME -0.2297120038.008611e-02
DIFF_eGFR DIFF_ESTIMATED_SPLEEN_VOL 0.2254562468.599905e-02
DIFF_PLT DIFF_BW 0.2246622868.713923e-02
DIFF_PLT DIFF_BM_UPTAKE 0.2186760199.612251e-02
CYCLES_BETWEEN_PET1_PET2 DIFF_SPLEEN_UPTAKE 0.2181789079.689979e-02
CYCLES_BETWEEN_PET1_PET2 DIFF_BW 0.2171889189.846228e-02
DIFF_eGFR DIFF_K -0.2156719491.008944e-01
DIFF_CRP DIFF_SLR 0.2135498131.043747e-01
DIFF_SPLEEN_UPTAKE DIFF_SLR 0.2124715051.061782e-01
AGE DIFF_ESTIMATED_SPLEEN_VOL-0.2121448561.067293e-01
DIFF_ALBUMIN DIFF_SPLEEN_UPTAKE -0.2109724391.087254e-01
AGE DIFF_BM_UPTAKE -0.2099920871.104165e-01
DIFF_ALBUMIN DIFF_BW -0.2085118481.130081e-01
DIFF_PLT DIFF_ESTIMATED_SPLEEN_VOL 0.2083467291.133001e-01
DIFF_ESTIMATED_SPLEEN_VOLdias 0.2022497551.244907e-01
DIFF_BW DIFF_SLR -0.2019122821.251337e-01
DIFF_LDH dias -0.2011258121.266422e-01
DIFF_LDH DIFF_SPLEEN_UPTAKE -0.1997895421.292366e-01
DIFF_BW DIFF_BM_UPTAKE -0.1994064401.299878e-01
DIFF_BW OVERALL_TIME 0.1983380421.321001e-01
DIFF_CRP DIFF_LIVER_UPTAKE -0.1971122951.345553e-01
DIFF_CRP DIFF_SPLEEN_UPTAKE 0.1955889451.376543e-01
DIFF_eGFR DIFF_BMLR -0.1950670681.387282e-01
DIFF_CRP DIFF_AST -0.1925409571.440151e-01
DIFF_SPLEEN_UPTAKE DIFF_LIVER_UPTAKE 0.1919417791.452910e-01
DIFF_AST DIFF_LIVER_UPTAKE 0.1908672461.476000e-01
DIFF_eGFR dias -0.1879068781.541026e-01
DIFF_HB DIFF_LDH 0.1875685081.548592e-01
DIFF_BGL DIFF_SPLEEN_UPTAKE 0.1860318951.583293e-01
DIFF_RBC DIFF_SLR -0.1841693821.626119e-01
DIFF_ALT DIFF_LIVER_UPTAKE 0.1838213811.634214e-01
DIFF_LIVER_UPTAKE DIFF_ESTIMATED_SPLEEN_VOL-0.1825009501.665199e-01
DIFF_K DIFF_BW 0.1822315941.671572e-01
DIFF_HB DIFF_CRP -0.1812043261.696041e-01
DIFF_ESTIMATED_SPLEEN_VOLOVERALL_TIME 0.1793803741.740129e-01
AGE dias -0.1766211151.808401e-01
DIFF_LIVER_UPTAKE OVERALL_TIME -0.1764757911.812049e-01
DIFF_SPLEEN_UPTAKE DIFF_BM_UPTAKE 0.1750529791.848053e-01
DIFF_LDH DIFF_ESTIMATED_SPLEEN_VOL 0.1740972931.872523e-01
DIFF_BW DIFF_LIVER_UPTAKE -0.1726608431.909740e-01
AGE DIFF_BGL -0.1719562401.928189e-01
BMI DIFF_SLR 0.1699348321.981820e-01
DIFF_AST DIFF_SLR -0.1679379272.035837e-01
DIFF_WBC DIFF_K -0.1656802152.098155e-01
DIFF_ALBUMIN DIFF_LDH -0.1648253692.122098e-01
DIFF_ALBUMIN DIFF_BM_UPTAKE -0.1625823802.185832e-01
DIFF_CRP DIFF_ALT -0.1615859022.214572e-01
DIFF_BGL DIFF_BM_UPTAKE 0.1581847442.314647e-01
AGE DIFF_BMLR 0.1578016822.326111e-01
DIFF_HB TIME_BETWEE_PET 0.1565497792.363851e-01
DIFF_SPLEEN_UPTAKE dias 0.1544087992.429364e-01
AGE DIFF_SPLEEN_UPTAKE 0.1540060502.441826e-01
DIFF_HB DIFF_BMLR -0.1522015532.498195e-01
DIFF_LDH DIFF_BMLR -0.1519464582.506234e-01
DIFF_ALT DIFF_ESTIMATED_SPLEEN_VOL-0.1514947842.520512e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_AST 0.1497723432.575466e-01
DIFF_BMLR OVERALL_TIME 0.1482527052.624617e-01
DIFF_BM_UPTAKE DIFF_BMLR 0.1481990212.626365e-01
DIFF_BGL DIFF_SLR 0.1467635192.673391e-01
DIFF_RBC DIFF_PLT 0.1458235922.704486e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_HB 0.1454848622.715751e-01
DIFF_HB DIFF_BW 0.1447684062.739680e-01
DIFF_ALT DIFF_SPLEEN_UPTAKE -0.1417422962.842295e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_WBC 0.1416959742.843886e-01
DIFF_K DIFF_BMLR 0.1410825992.864997e-01
DIFF_AST DIFF_BW 0.1408938912.871513e-01
DIFF_BM_UPTAKE DIFF_ESTIMATED_SPLEEN_VOL 0.1389327262.939806e-01
AGE DIFF_eGFR -0.1379649272.973895e-01
DIFF_ALT DIFF_BGL 0.1374557922.991930e-01
DIFF_RBC DIFF_ALBUMIN 0.1371958793.001165e-01
DIFF_BM_UPTAKE DIFF_LIVER_UPTAKE 0.1359704243.044954e-01
DIFF_LDH DIFF_BM_UPTAKE 0.1349720153.080934e-01
DIFF_PLT OVERALL_TIME 0.1333479833.140042e-01
DIFF_WBC DIFF_AST -0.1332359173.144148e-01
DIFF_PLT DIFF_BGL 0.1328305433.159027e-01
DIFF_HB dias -0.1326262343.166543e-01
DIFF_BW TIME_BETWEE_PET -0.1320820563.186617e-01
DIFF_AST DIFF_BMLR -0.1313782223.212701e-01
AGE DIFF_BW 0.1296819683.276121e-01
BMI DIFF_BM_UPTAKE 0.1285599483.318502e-01
DIFF_eGFR DIFF_SPLEEN_UPTAKE -0.1283100503.327988e-01
DIFF_PLT DIFF_LDH 0.1253891013.440128e-01
DIFF_ALT BMI 0.1243479403.480660e-01
DIFF_ALT DIFF_SLR -0.1227113993.544966e-01
DIFF_ALT dias -0.1208046433.620803e-01
CYCLES_BETWEEN_PET1_PET2 BMI -0.1205068573.632736e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_LIVER_UPTAKE 0.1180038533.733978e-01
TIME_BETWEE_PET dias 0.1173197933.761940e-01
DIFF_WBC DIFF_eGFR 0.1168373833.781735e-01
DIFF_ALBUMIN DIFF_BGL -0.1167477843.785418e-01
BMI DIFF_BMLR 0.1160758643.813109e-01
DIFF_BGL DIFF_BMLR -0.1158967043.820512e-01
DIFF_WBC dias -0.1157090553.828276e-01
TIME_BETWEE_PET DIFF_SLR -0.1156942153.828891e-01
AGE DIFF_RBC 0.1141946723.891276e-01
DIFF_eGFR DIFF_BGL 0.1140259883.898332e-01
DIFF_K DIFF_BM_UPTAKE 0.1118597013.989608e-01
BMI DIFF_LIVER_UPTAKE 0.1111094884.021508e-01
DIFF_RBC dias 0.1100647114.066180e-01
DIFF_HB DIFF_LIVER_UPTAKE 0.1096627124.083445e-01
DIFF_RBC OVERALL_TIME 0.1095936744.086415e-01
DIFF_RBC DIFF_ESTIMATED_SPLEEN_VOL-0.1093252294.097972e-01
DIFF_BW DIFF_BMLR -0.1080832314.151691e-01
DIFF_HB DIFF_PLT 0.1080614994.152635e-01
DIFF_CRP OVERALL_TIME 0.1069741774.200000e-01
DIFF_eGFR DIFF_ALT 0.1063167584.228788e-01
DIFF_RBC DIFF_BW 0.1056140504.259682e-01
DIFF_LDH DIFF_K 0.1056093074.259891e-01
DIFF_CRP DIFF_BMLR 0.1039179304.334778e-01
DIFF_ALBUMIN DIFF_eGFR -0.1006195574.482927e-01
DIFF_PLT DIFF_CRP 0.1002426054.500035e-01
dias DIFF_BMLR 0.0987589014.567719e-01
DIFF_ALBUMIN DIFF_ALT 0.0980492444.600288e-01
dias DIFF_SLR 0.0963130544.680500e-01
AGE DIFF_PLT 0.0955042784.718122e-01
DIFF_CRP dias 0.0931896094.826683e-01
DIFF_HB DIFF_ALBUMIN 0.0925219584.858240e-01
DIFF_WBC DIFF_BMLR -0.0923477074.866494e-01
BMI dias 0.0914683964.908258e-01
AGE DIFF_ALT -0.0910368244.928824e-01
DIFF_CRP DIFF_BW 0.0907708584.941521e-01
DIFF_eGFR OVERALL_TIME -0.0906857954.945586e-01
DIFF_LDH DIFF_LIVER_UPTAKE 0.0895500585.000018e-01
DIFF_ALT DIFF_BMLR -0.0895422075.000396e-01
TIME_BETWEE_PET DIFF_BMLR -0.0893887575.007774e-01
DIFF_BM_UPTAKE OVERALL_TIME 0.0890959005.021871e-01
DIFF_AST DIFF_BM_UPTAKE 0.0878085135.084083e-01
DIFF_PLT DIFF_SPLEEN_UPTAKE 0.0865498455.145285e-01
CYCLES_BETWEEN_PET1_PET2 AGE -0.0860422555.170072e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_LDH 0.0853302845.204939e-01
DIFF_WBC DIFF_ESTIMATED_SPLEEN_VOL 0.0830824985.315790e-01
DIFF_LDH OVERALL_TIME 0.0828702785.326315e-01
DIFF_eGFR DIFF_BW 0.0826303835.338226e-01
DIFF_SPLEEN_UPTAKE OVERALL_TIME 0.0817051905.384283e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_ALT 0.0816847755.385302e-01
DIFF_BW DIFF_ESTIMATED_SPLEEN_VOL 0.0805824525.440434e-01
DIFF_AST DIFF_ESTIMATED_SPLEEN_VOL-0.0805412285.442501e-01
DIFF_HB DIFF_eGFR -0.0798345745.477993e-01
DIFF_WBC DIFF_ALT -0.0797679585.481345e-01
DIFF_CRP DIFF_K -0.0779608005.572641e-01
DIFF_AST DIFF_BGL 0.0771692925.612853e-01
DIFF_ALBUMIN DIFF_LIVER_UPTAKE 0.0764667205.648661e-01
DIFF_ALBUMIN DIFF_SLR -0.0759687885.674104e-01
DIFF_BM_UPTAKE dias 0.0755893845.693526e-01
BMI DIFF_ESTIMATED_SPLEEN_VOL 0.0740221225.774081e-01
DIFF_AST OVERALL_TIME 0.0729221675.830927e-01
DIFF_PLT BMI -0.0705945965.952046e-01
DIFF_HB DIFF_K 0.0703605725.964286e-01
DIFF_K DIFF_SLR -0.0697959915.993859e-01
DIFF_K TIME_BETWEE_PET -0.0697870355.994329e-01
DIFF_ESTIMATED_SPLEEN_VOLDIFF_SLR -0.0693756486.015919e-01
AGE DIFF_HB -0.0689256986.039573e-01
AGE DIFF_CRP -0.0683989896.067312e-01
DIFF_BGL OVERALL_TIME -0.0675831896.110387e-01
DIFF_LDH DIFF_BW 0.0673879746.120714e-01
DIFF_PLT DIFF_AST 0.0669871846.141939e-01
DIFF_LIVER_UPTAKE DIFF_SLR 0.0656370096.213673e-01
DIFF_AST DIFF_SPLEEN_UPTAKE -0.0651945176.237260e-01
DIFF_eGFR TIME_BETWEE_PET -0.0633908806.333787e-01
DIFF_ALBUMIN TIME_BETWEE_PET 0.0631583796.346275e-01
DIFF_CRP DIFF_ESTIMATED_SPLEEN_VOL 0.0629840726.355644e-01
DIFF_K OVERALL_TIME -0.0621380626.401196e-01
DIFF_SPLEEN_UPTAKE TIME_BETWEE_PET -0.0617301786.423206e-01
DIFF_HB DIFF_SPLEEN_UPTAKE -0.0610649096.459169e-01
DIFF_RBC DIFF_BM_UPTAKE -0.0606715966.480469e-01
DIFF_WBC DIFF_HB 0.0606330756.482557e-01
DIFF_WBC BMI 0.0602305776.504386e-01
DIFF_WBC DIFF_ALBUMIN -0.0599108646.521746e-01
DIFF_RBC DIFF_LIVER_UPTAKE -0.0596908066.533706e-01
DIFF_eGFR DIFF_AST 0.0575428146.650898e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_BM_UPTAKE -0.0573460386.661674e-01
DIFF_PLT DIFF_K 0.0554199616.767505e-01
DIFF_RBC DIFF_LDH 0.0544197816.822710e-01
DIFF_ALT DIFF_BM_UPTAKE 0.0536575396.864893e-01
DIFF_RBC DIFF_BMLR -0.0527492446.915283e-01
DIFF_PLT DIFF_SLR 0.0521093636.950862e-01
DIFF_BW dias -0.0512964506.996157e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_BGL 0.0507686937.025619e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_CRP 0.0507347707.027514e-01
DIFF_WBC DIFF_BW -0.0505263237.039164e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_ALBUMIN -0.0498307987.078084e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_K -0.0477883427.192803e-01
DIFF_HB DIFF_ALT 0.0477874567.192853e-01
DIFF_WBC DIFF_SLR 0.0474012607.214615e-01
DIFF_RBC DIFF_AST 0.0463797767.272280e-01
DIFF_BGL DIFF_LIVER_UPTAKE 0.0457016767.310644e-01
DIFF_PLT dias 0.0456591417.313053e-01
DIFF_LDH DIFF_eGFR 0.0434445877.438808e-01
DIFF_PLT TIME_BETWEE_PET 0.0430351237.462134e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_BMLR -0.0424772307.493951e-01
DIFF_HB DIFF_AST 0.0418548247.529498e-01
DIFF_CRP DIFF_eGFR 0.0411229167.571363e-01
DIFF_RBC DIFF_CRP -0.0408896957.584718e-01
DIFF_BM_UPTAKE DIFF_SLR 0.0406517137.598352e-01
DIFF_PLT DIFF_BMLR 0.0401852917.625096e-01
DIFF_WBC OVERALL_TIME 0.0387781757.705944e-01
AGE DIFF_SLR -0.0387226347.709141e-01
DIFF_RBC DIFF_K -0.0378505567.759376e-01
DIFF_LDH DIFF_BGL -0.0377099267.767485e-01
CYCLES_BETWEEN_PET1_PET2 OVERALL_TIME 0.0373613867.787594e-01
DIFF_ALBUMIN BMI 0.0373487647.788323e-01
DIFF_RBC DIFF_SPLEEN_UPTAKE -0.0367252707.824331e-01
DIFF_AST BMI 0.0362382377.852491e-01
DIFF_WBC DIFF_SPLEEN_UPTAKE -0.0345331527.951290e-01
DIFF_CRP DIFF_LDH 0.0345205567.952021e-01
DIFF_LDH BMI -0.0341214647.975193e-01
DIFF_LIVER_UPTAKE TIME_BETWEE_PET 0.0339222337.986768e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_eGFR -0.0331213378.033339e-01
DIFF_LIVER_UPTAKE dias -0.0330194138.039271e-01
BMI TIME_BETWEE_PET 0.0323984818.075431e-01
DIFF_eGFR DIFF_BM_UPTAKE 0.0311244258.149751e-01
DIFF_BM_UPTAKE TIME_BETWEE_PET 0.0310962898.151394e-01
DIFF_HB DIFF_ESTIMATED_SPLEEN_VOL-0.0303375728.195732e-01
DIFF_BGL BMI 0.0302091418.203243e-01
DIFF_LDH TIME_BETWEE_PET 0.0299990548.215533e-01
DIFF_HB DIFF_BM_UPTAKE -0.0295940148.239239e-01
DIFF_eGFR BMI 0.0295157668.243821e-01
DIFF_WBC DIFF_LDH -0.0284374048.307019e-01
AGE DIFF_LIVER_UPTAKE 0.0280000778.332679e-01
AGE OVERALL_TIME 0.0260430428.447718e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_ESTIMATED_SPLEEN_VOL-0.0257246488.466465e-01
DIFF_ALBUMIN DIFF_K -0.0255458458.476997e-01
DIFF_PLT DIFF_ALT -0.0254705408.481433e-01
DIFF_SPLEEN_UPTAKE DIFF_ESTIMATED_SPLEEN_VOL-0.0253985898.485672e-01
DIFF_BGL DIFF_ESTIMATED_SPLEEN_VOL-0.0249620028.511404e-01
DIFF_ALBUMIN dias 0.0239104188.573446e-01
DIFF_BGL dias 0.0234601858.600035e-01
DIFF_BW DIFF_SPLEEN_UPTAKE -0.0229708618.628950e-01
DIFF_CRP TIME_BETWEE_PET -0.0224416838.660241e-01
DIFF_AST DIFF_K 0.0219945648.686696e-01
dias OVERALL_TIME 0.0217613548.700499e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_SLR 0.0216143248.709204e-01
AGE TIME_BETWEE_PET 0.0201627838.795222e-01
DIFF_ESTIMATED_SPLEEN_VOLTIME_BETWEE_PET 0.0200949678.799244e-01
DIFF_CRP DIFF_BM_UPTAKE -0.0197439578.820068e-01
DIFF_HB OVERALL_TIME 0.0197277058.821032e-01
DIFF_K DIFF_SPLEEN_UPTAKE 0.0190885448.858971e-01
DIFF_SLR OVERALL_TIME 0.0175902958.948001e-01
DIFF_ALT DIFF_BW -0.0175731028.949023e-01
DIFF_K DIFF_BGL 0.0166635009.003139e-01
DIFF_AST dias 0.0165726649.008546e-01
DIFF_CRP BMI -0.0162650349.026860e-01
DIFF_ALT OVERALL_TIME -0.0158797509.049803e-01
DIFF_ALBUMIN OVERALL_TIME -0.0151196159.095093e-01
AGE BMI 0.0149507119.105160e-01
DIFF_AST TIME_BETWEE_PET 0.0142662949.145968e-01
DIFF_LDH DIFF_SLR 0.0132686469.205491e-01
DIFF_ALBUMIN DIFF_AST 0.0131513589.212492e-01
DIFF_eGFR DIFF_SLR 0.0119816489.282341e-01
AGE DIFF_LDH 0.0109287289.345263e-01
AGE DIFF_AST 0.0107086509.358419e-01
DIFF_BGL DIFF_BW -0.0106893239.359575e-01
DIFF_LIVER_UPTAKE DIFF_BMLR -0.0100530419.397623e-01
DIFF_ALT DIFF_K 0.0094699469.432503e-01
DIFF_RBC DIFF_ALT 0.0084750729.492039e-01
AGE DIFF_WBC -0.0081484039.511594e-01
DIFF_ALT TIME_BETWEE_PET 0.0070305159.578533e-01
DIFF_K DIFF_LIVER_UPTAKE 0.0056410489.661775e-01
DIFF_WBC DIFF_BGL -0.0046914749.718684e-01
DIFF_ALBUMIN DIFF_BMLR 0.0035215389.788819e-01
DIFF_BGL TIME_BETWEE_PET 0.0028567319.828679e-01
AGE DIFF_ALBUMIN -0.0026625339.840324e-01
CYCLES_BETWEEN_PET1_PET2 DIFF_PLT -0.0016650299.900142e-01
DIFF_WBC DIFF_BM_UPTAKE -0.0014758759.911486e-01
DIFF_K dias 0.0014583149.912539e-01

Descripcion variables cualitativas de entrada¶

In [20]:
# Todas la tablas de frecuencias:
all_var <- univar_category(datos) 
all_var
$STATUS
A tibble: 2 × 3
STATUSnrate
<fct><int><dbl>
ALIVE380.6440678
DEATH210.3559322
$GENDER
A tibble: 2 × 3
GENDERnrate
<fct><int><dbl>
FEMALE200.3389831
MALE 390.6610169
$TNM_STAGE
A tibble: 4 × 3
TNM_STAGEnrate
<fct><int><dbl>
I 60.1016949
II 150.2542373
III260.4406780
IV 120.2033898
$DIAGNOSTIC
A tibble: 10 × 3
DIAGNOSTICnrate
<fct><int><dbl>
EWING SARCOMA 20.03389831
GASTRIC CANCER 30.05084746
GINECOLOGICAL 70.11864407
HEAD AND NECK 100.16949153
LUNG CANCER 190.32203390
MELANOMA 50.08474576
PANCREAS CANCER 70.11864407
RENAL CANCER 40.06779661
SARCOMA 10.01694915
UROTHELIAL CARCINOMA 10.01694915
$TREATMENT
A tibble: 2 × 3
TREATMENTnrate
<fct><int><dbl>
CHEMO370.6271186
ICI 220.3728814
$ECOGPS
A tibble: 5 × 3
ECOGPSnrate
<fct><int><dbl>
ASYMTOMATIC 260.44067797
BEDBOUND 10.01694915
SYMPTOMATIC >50 % IN THE BED 10.01694915
SYMPTOMATIC BUT AMBULATORY 170.28813559
SYMPTOMATIC,<50% IN BED DURING THE DAY140.23728814
$COMORBIDITIES
A tibble: 6 × 3
COMORBIDITIESnrate
<fct><int><dbl>
ATEROESCLEROSIS 30.05084746
CANCER RRECURRENCE 80.13559322
CHRONIC INFLAMMATION 90.15254237
DIABETES MELLITUS 130.22033898
HYPERLIPIDEMIA 120.20338983
HYPERTENSION 140.23728814
$CTCNCI
A tibble: 5 × 3
CTCNCInrate
<fct><int><dbl>
HIGHT SEVERE 40.06779661
MODERATE 110.18644068
NO SIDE EFFECTS 280.47457627
SEVERE 120.20338983
SLIGTHLY SIDE EFFCTS 40.06779661
$ACTION_TAKEN_
A tibble: 7 × 3
ACTION_TAKEN_nrate
<fct><int><dbl>
ADDED OTHER TREATMEN 30.05084746
ADDED STEROIDS 10.01694915
DOSE NOT CHANGED 210.35593220
DOSE REDUCED 170.28813559
DRUG INTERRUPTED 90.15254237
DRUG WIHDRAWN 10.01694915
UNKNOW 70.11864407
In [21]:
# Diagrama de barras por separado variables cualitativas:
plot(all_var)
In [22]:
# Todos los diagramas de barra juntos:
plot_bar_category(datos)
In [27]:
# Tabla de frecuencias + diagrama de barras + diagrama de sectores en una iteración:

vec_var_cual <- c('GENDER','TNM_STAGE','DIAGNOSTIC','TREATMENT','ECOGPS','COMORBIDITIES','CTCNCI','ACTION_TAKEN_')

for (i in vec_var_cual) {
    
    # 0. Seleccionar la variable con el contador i:
    #***************************************************************    
    var_i = i
    
    print("********************************************")
    print(var_i)
    print("********************************************")

    # 1. Creo la tabla de frecuencias relativas y absolutas:
    #***************************************************************

    # Tabla de frecuencias absolutas:
    tabla_frec = data.frame(t(table(datos[,var_i])))[,2:3]
    colnames(tabla_frec)<-c("Grupos","Frec_Absoluta")

    # Tabla de frecuencias relativas:
    tabla_frec$Frec_Relativa <- tabla_frec$Frec_Absoluta/sum(tabla_frec$Frec_Absoluta)
    print(tabla_frec)


    # 3. Crear el diagrama de sectores
    #***************************************************************

    data <- data.frame(
      group=tabla_frec$Grupos,
      value=tabla_frec$Frec_Relativa
    )

    
    data <- data %>% 
      arrange(desc(group)) %>%
      mutate(prop = value / sum(data$value) *100) %>%
      mutate(ypos = cumsum(prop)- 0.5*prop )

   
    p2 <- ggplot(data, aes(x="", y=prop, fill=group)) +
      geom_bar(stat="identity", width=1, color="white") +
      coord_polar("y", start=0) +
      theme_void() + 
      theme(legend.position="none") +

      geom_text(aes(y = ypos, label = group), color = "white", size=6) +
      scale_fill_brewer(palette="Set1")

    
    
    # 2. Crear el barplot frecuencia Absoluta
    #***************************************************************
    p1 <- ggplot(data=tabla_frec, aes(x=Grupos, y=Frec_Absoluta)) +
      geom_bar(stat="identity", fill="steelblue")+
      geom_text(aes(label=Frec_Relativa), vjust=1.6, color="black", size=5)+
      theme_minimal()

   
      grid.arrange(p1, p2,
               nrow=2, ncol = 1,
               top = textGrob(var_i,gp=gpar(fontsize=16,font=3)))    
    
    
}
[1] "********************************************"
[1] "GENDER"
[1] "********************************************"
  Grupos Frec_Absoluta Frec_Relativa
1 FEMALE            20     0.3389831
2   MALE            39     0.6610169
[1] "********************************************"
[1] "TNM_STAGE"
[1] "********************************************"
  Grupos Frec_Absoluta Frec_Relativa
1      I             6     0.1016949
2     II            15     0.2542373
3    III            26     0.4406780
4     IV            12     0.2033898
[1] "********************************************"
[1] "DIAGNOSTIC"
[1] "********************************************"
                 Grupos Frec_Absoluta Frec_Relativa
1         EWING SARCOMA             2    0.03389831
2        GASTRIC CANCER             3    0.05084746
3         GINECOLOGICAL             7    0.11864407
4         HEAD AND NECK            10    0.16949153
5           LUNG CANCER            19    0.32203390
6              MELANOMA             5    0.08474576
7       PANCREAS CANCER             7    0.11864407
8          RENAL CANCER             4    0.06779661
9               SARCOMA             1    0.01694915
10 UROTHELIAL CARCINOMA             1    0.01694915
[1] "********************************************"
[1] "TREATMENT"
[1] "********************************************"
  Grupos Frec_Absoluta Frec_Relativa
1  CHEMO            37     0.6271186
2    ICI            22     0.3728814
[1] "********************************************"
[1] "ECOGPS"
[1] "********************************************"
                                  Grupos Frec_Absoluta Frec_Relativa
1                            ASYMTOMATIC            26    0.44067797
2                               BEDBOUND             1    0.01694915
3           SYMPTOMATIC >50 % IN THE BED             1    0.01694915
4             SYMPTOMATIC BUT AMBULATORY            17    0.28813559
5 SYMPTOMATIC,<50% IN BED DURING THE DAY            14    0.23728814
[1] "********************************************"
[1] "COMORBIDITIES"
[1] "********************************************"
                Grupos Frec_Absoluta Frec_Relativa
1      ATEROESCLEROSIS             3    0.05084746
2   CANCER RRECURRENCE             8    0.13559322
3 CHRONIC INFLAMMATION             9    0.15254237
4    DIABETES MELLITUS            13    0.22033898
5       HYPERLIPIDEMIA            12    0.20338983
6         HYPERTENSION            14    0.23728814
[1] "********************************************"
[1] "CTCNCI"
[1] "********************************************"
                Grupos Frec_Absoluta Frec_Relativa
1         HIGHT SEVERE             4    0.06779661
2             MODERATE            11    0.18644068
3      NO SIDE EFFECTS            28    0.47457627
4               SEVERE            12    0.20338983
5 SLIGTHLY SIDE EFFCTS             4    0.06779661
[1] "********************************************"
[1] "ACTION_TAKEN_"
[1] "********************************************"
                Grupos Frec_Absoluta Frec_Relativa
1 ADDED OTHER TREATMEN             3    0.05084746
2       ADDED STEROIDS             1    0.01694915
3     DOSE NOT CHANGED            21    0.35593220
4         DOSE REDUCED            17    0.28813559
5     DRUG INTERRUPTED             9    0.15254237
6        DRUG WIHDRAWN             1    0.01694915
7               UNKNOW             7    0.11864407

EXPLORACION MULTIVARIADA DEL DATASET¶

Variables numericas vs salida¶

Estadisticos de las variables numericas vs salida¶

In [181]:
# DESCRIPCION MULTIVARIADA: las variables en función de la respuesta
name_y <- as.name(respuesta[1])

descripcion_num <- datos %>%
  group_by(!!name_y)%>%
  dlookr::describe()
descripcion_num
A tibble: 50 × 27
described_variablesSTATUSnnameansdse_meanIQRskewnesskurtosisp00p01p05p10p20p25p30p40p50p60p70p75p80p90p95p99p100
<chr><fct><int><int><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
AGE ALIVE380 68.552631579 8.1562358 1.3231161612.2500000-0.35796368-0.48367838 52.0000000 52.7400000 54.0000000 55.7000000 60.40000000 62.50000000 65.00000000 68.000000000 70.00000000072.0000000074.000000074.7500000 75.6000000 76.3000000 79.0000000 83.4100000 86.0000000
AGE DEATH210 70.904761905 9.4228698 2.0562387814.0000000-0.14495828-0.82993240 53.0000000 53.6000000 56.0000000 60.0000000 63.00000000 64.00000000 67.00000000 68.000000000 70.00000000074.0000000077.000000078.0000000 79.0000000 83.0000000 83.0000000 86.2000000 87.0000000
BMI ALIVE380 22.135789474 4.2896479 0.69587278 5.3525000 0.88974456 1.40256997 15.4300000 15.7297000 16.4355000 17.1050000 18.22400000 18.83250000 20.12300000 20.808000000 21.82500000022.8320000023.579000024.1850000 25.7080000 26.9140000 29.1775000 33.4975000 35.8100000
BMI DEATH210 20.474761905 3.3661545 0.73455514 5.6200000-0.05915208-1.07758686 14.0800000 14.4980000 16.1700000 16.6800000 17.30000000 17.89000000 18.07000000 19.260000000 20.07000000021.7900000023.140000023.5100000 23.7600000 24.4600000 25.2100000 25.6660000 25.7800000
CYCLES_BETWEEN_PET1_PET2 ALIVE380 7.368421053 8.8330493 1.43290981 3.7500000 3.2390467312.32718502 2.0000000 2.0000000 2.0000000 2.0000000 3.00000000 3.00000000 3.00000000 4.000000000 4.000000000 5.00000000 6.0000000 6.7500000 11.2000000 14.0000000 21.3500000 40.9700000 48.0000000
CYCLES_BETWEEN_PET1_PET2 DEATH210 6.523809524 4.0201872 0.87727676 4.0000000 1.58544523 2.70180500 2.0000000 2.0000000 2.0000000 3.0000000 4.00000000 4.00000000 4.00000000 5.000000000 6.000000000 6.00000000 7.0000000 8.0000000 9.0000000 10.0000000 15.0000000 17.4000000 18.0000000
dias ALIVE380 46.842105263137.893520722.3692828923.7500000 4.0656318417.78192579 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000 0.000000000 2.0000000014.900000023.7500000 30.4000000 64.2000000319.6500000597.3600000735.0000000
dias DEATH210 75.809523810127.562384427.8363943986.0000000 1.95190402 3.08842106 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 3.000000000 7.00000000033.0000000052.000000086.0000000114.0000000233.0000000364.0000000425.6000000441.0000000
DIFF_ALBUMIN ALIVE380 -0.057631579 0.6964948 0.11298638 0.4625000 0.30469934 1.31440247 -1.7000000 -1.6260000 -1.2620000 -0.6430000 -0.48400000 -0.29500000 -0.23700000 -0.168000000 -0.120000000-0.06600000 0.0500000 0.1675000 0.2200000 1.0310000 1.2025000 1.5883000 1.7400000
DIFF_ALBUMIN DEATH210 -0.057619048 0.9679251 0.21121858 0.2500000 0.88964699 4.51502526 -2.2800000 -2.0940000 -1.3500000 -0.9000000 -0.37000000 -0.17000000 -0.16000000 -0.110000000 -0.060000000-0.05000000 0.0500000 0.0800000 0.1000000 0.2000000 1.5700000 2.5780000 2.8300000
DIFF_ALT ALIVE380 -5.223684211 15.9310412 2.5843561410.2250000-3.3855993816.87827941 -84.7000000 -62.2040000 -19.9900000 -15.1500000-10.08000000 -9.20000000 -6.67000000 -4.640000000 -2.250000000-1.10000000 0.5500000 1.0250000 2.0400000 5.2300000 13.7400000 17.7450000 18.3000000
DIFF_ALT DEATH210-17.208571429 57.142467112.46950861 9.4200000-3.8228471615.51173073-250.8000000-217.6200000 -84.9000000 -16.9000000 -9.20000000 -8.80000000 -8.60000000 -6.000000000 -2.600000000 0.00000000 0.4000000 0.6200000 3.3000000 7.5000000 14.3000000 18.3800000 19.4000000
DIFF_AST ALIVE380 -4.778947368 19.2680124 3.12568435 9.0250000-4.7830627426.77840367-111.5000000 -77.5340000 -18.8500000 -12.6100000 -7.14000000 -6.55000000 -5.51000000 -3.120000000 -0.700000000 0.02000000 1.1400000 2.4750000 4.3600000 5.0600000 7.3050000 14.8300000 18.9000000
DIFF_AST DEATH210-11.800476190 49.721023810.8500169112.0000000-4.0340014717.16036256-219.6000000-186.8600000 -55.9000000 -11.6000000 -5.80000000 -5.50000000 -3.50000000 -1.100000000 -0.800000000 0.00000000 1.7000000 6.5000000 6.6000000 11.9000000 13.2000000 15.1200000 15.6000000
DIFF_BGL ALIVE380 -3.315789474 43.2723026 7.0196944327.5000000-1.63834398 4.74218790-160.0000000-144.4600000 -77.2000000 -45.5000000-13.60000000-12.75000000-10.70000000 -4.600000000 2.000000000 9.4000000012.000000014.7500000 19.6000000 30.4000000 64.0000000 67.7800000 70.0000000
DIFF_BGL DEATH210 2.428571429 22.6728283 4.9476167623.0000000-1.62777362 4.23786443 -70.0000000 -61.4000000 -27.0000000 -18.0000000 -6.00000000 -6.00000000 -2.00000000 -1.000000000 1.00000000014.0000000016.000000017.0000000 17.0000000 29.0000000 29.0000000 29.8000000 30.0000000
DIFF_BM_UPTAKE ALIVE380 0.100000000 0.6192498 0.10045558 0.7750000 0.47868759 1.70780206 -1.2000000 -1.1297000 -0.8315000 -0.6480000 -0.34800000 -0.28000000 -0.19800000 -0.028000000 0.095000000 0.20200000 0.4390000 0.4950000 0.5740000 0.7210000 0.8490000 1.6434000 2.0800000
DIFF_BM_UPTAKE DEATH210 0.120000000 0.5834724 0.12732411 0.6000000 0.97872356 2.56877396 -0.8200000 -0.8000000 -0.7200000 -0.4500000 -0.40000000 -0.21000000 -0.19000000 0.010000000 0.200000000 0.24000000 0.3500000 0.3900000 0.4100000 0.7000000 0.7400000 1.6120000 1.8300000
DIFF_BMLR ALIVE380 0.005877421 0.2736999 0.04439999 0.3242798 0.06548622 0.10607508 -0.6192344 -0.5805216 -0.3880261 -0.3188116 -0.20431746 -0.16686314 -0.13508456 -0.064767496 0.006350011 0.07472226 0.1402702 0.1574167 0.1849698 0.3389879 0.4989430 0.5797461 0.6197762
DIFF_BMLR DEATH210 0.072130571 0.3516416 0.07673450 0.3824610 0.42492308 1.98629788 -0.6207071 -0.6042323 -0.5383333 -0.3211071 -0.09719522 -0.09373885 -0.07072829 0.002627451 0.081944444 0.09726844 0.2001239 0.2887222 0.2893665 0.3768175 0.4389387 0.9038135 1.0200322
DIFF_BW ALIVE380 0.142105263 6.2442030 1.01294348 6.6500000 0.72112651 0.63190779 -11.1000000 -10.9890000 -8.5050000 -6.3200000 -4.86000000 -3.60000000 -2.38000000 -1.300000000 -0.550000000 0.00000000 0.8000000 3.0500000 4.5600000 9.0200000 12.6350000 15.4150000 15.6000000
DIFF_BW DEATH210 8.172380952 11.3566201 2.47821767 4.4000000 1.94716942 4.16313988 -4.3000000 -3.9000000 -2.3000000 -1.5000000 0.00000000 3.10000000 3.60000000 4.600000000 5.300000000 5.90000000 6.9000000 7.5000000 8.7000000 24.1000000 26.2000000 40.4560000 44.0200000
DIFF_CRP ALIVE380 0.586578947 4.5745998 0.74209809 0.4450000 2.28771118 7.20120744 -7.2300000 -6.9155000 -4.2550000 -3.0580000 -0.52000000 -0.33750000 -0.10000000 -0.052000000 -0.010000000 0.02000000 0.0400000 0.1075000 0.1800000 4.6960000 8.6990000 16.8606000 17.8300000
DIFF_CRP DEATH210 -0.123809524 4.1593082 0.90763546 1.1800000-2.6547980211.12447049 -15.8800000 -13.2320000 -2.6400000 -2.0300000 -0.22000000 -0.15000000 -0.02000000 0.030000000 0.250000000 0.32000000 0.3900000 1.0300000 1.1900000 2.9600000 3.0200000 6.3720000 7.2100000
DIFF_eGFR ALIVE380 -1.621052632 15.5613374 2.5243822614.5000000 1.01248285 2.01317072 -30.1000000 -28.2500000 -24.2500000 -17.3400000-12.98000000-10.22500000 -9.38000000 -7.820000000 -3.150000000 0.08000000 2.8900000 4.2750000 8.0200000 14.9400000 25.2900000 42.7420000 47.7000000
DIFF_eGFR DEATH210 -4.085714286 18.1665430 3.9642646917.0000000 0.37991024 1.92431183 -39.2000000 -37.9000000 -32.7000000 -26.4000000-19.60000000-10.20000000 -6.30000000 -4.600000000 -2.600000000-0.90000000 3.8000000 6.8000000 7.5000000 9.5000000 9.9000000 38.2200000 45.3000000
DIFF_ESTIMATED_SPLEEN_VOLALIVE380 6.955263158 55.4416796 8.9938280546.5750000 0.38060194 1.62630342-110.2000000-101.8380000 -85.3900000 -63.1900000-42.22000000-15.85000000 -1.72000000 3.800000000 8.60000000018.2000000026.260000030.7250000 38.3600000 59.4800000 98.8900000148.2880000177.0000000
DIFF_ESTIMATED_SPLEEN_VOLDEATH210 11.623809524 39.4728502 8.6136821037.1000000-0.72159619 1.29425090 -85.5000000 -82.1000000 -68.5000000 -20.2000000 -5.60000000 -3.10000000 -2.40000000 2.400000000 15.10000000024.0000000033.100000034.0000000 34.2000000 55.2000000 65.2000000 80.4000000 84.2000000
DIFF_HB ALIVE380 0.169473684 2.1120644 0.34262208 2.8250000 0.51856836 0.08700801 -3.5000000 -3.3890000 -2.6900000 -2.2300000 -1.82000000 -1.45000000 -0.89000000 -0.420000000 -0.100000000 0.44000000 1.1800000 1.3750000 1.7600000 2.8820000 3.7150000 5.0600000 5.8000000
DIFF_HB DEATH210 -0.576190476 2.1030703 0.45892757 2.1000000 0.04091279 0.50400745 -5.2000000 -4.8600000 -3.5000000 -2.8000000 -1.60000000 -1.50000000 -1.20000000 -1.100000000 -0.900000000-0.60000000 0.4000000 0.6000000 0.9000000 1.9000000 2.4000000 3.6800000 4.0000000
DIFF_K ALIVE380 0.107894737 0.3612004 0.05859444 0.4125000 0.14853719-0.08333286 -0.6700000 -0.6293000 -0.4835000 -0.2810000 -0.18200000 -0.10000000 -0.06600000 0.048000000 0.075000000 0.14000000 0.2390000 0.3125000 0.3600000 0.6170000 0.7345000 0.8356000 0.8800000
DIFF_K DEATH210 0.220476190 0.4896067 0.10684095 0.5100000-0.52745476 0.22208083 -0.9000000 -0.8480000 -0.6400000 -0.3400000 -0.01000000 0.00000000 0.00000000 0.140000000 0.250000000 0.44000000 0.4800000 0.5100000 0.5900000 0.7300000 0.9100000 1.0380000 1.0700000
DIFF_LDH ALIVE380 -9.547368421 95.251141015.4517754552.2500000-2.30390086 5.50679173-333.0000000-318.5700000-274.4500000 -70.5000000-23.20000000-14.50000000-11.30000000 0.400000000 8.50000000017.0000000029.500000037.7500000 43.4000000 67.6000000 86.7500000 98.5600000103.0000000
DIFF_LDH DEATH210 9.428571429170.024872137.1024688841.0000000-0.38842221 6.26609952-510.0000000-436.0000000-140.0000000 -65.0000000-23.00000000 -9.00000000 -8.00000000 -4.000000000 5.000000000 5.0000000018.000000032.0000000 34.0000000170.0000000194.0000000416.4000000472.0000000
DIFF_LIVER_UPTAKE ALIVE380 0.094736842 0.5093755 0.08263162 0.7125000-1.03381469 1.86159126 -1.5900000 -1.3162000 -0.7225000 -0.4950000 -0.25800000 -0.21500000 -0.09800000 0.018000000 0.120000000 0.27800000 0.4580000 0.4975000 0.5500000 0.6160000 0.6785000 0.8652000 0.8800000
DIFF_LIVER_UPTAKE DEATH210 -0.121428571 0.4632201 0.10108291 0.6400000 0.13873995-0.73626526 -0.8700000 -0.8640000 -0.8400000 -0.7400000 -0.38000000 -0.37000000 -0.35000000 -0.320000000 -0.160000000-0.03000000 0.1900000 0.2700000 0.3000000 0.5100000 0.5200000 0.7200000 0.7700000
DIFF_PLT ALIVE380-26.710526316121.706760519.7434436687.7500000 0.20395211 2.11887063-354.0000000-309.2300000-229.6000000-166.2000000-89.80000000-77.25000000-60.90000000-56.400000000-18.000000000-8.00000000 3.600000010.5000000 19.2000000 96.5000000180.8000000288.6200000316.0000000
DIFF_PLT DEATH210 11.047619048141.208525330.8142264858.0000000 1.31681596 5.07388455-258.0000000-244.2000000-189.0000000-151.0000000-40.00000000-24.00000000-12.00000000 -9.000000000 4.00000000016.0000000031.000000034.0000000 44.0000000 79.0000000197.0000000409.8000000463.0000000
DIFF_RBC ALIVE380 -0.280000000 0.7164439 0.11622255 0.9000000-0.04813573-0.38757697 -1.9300000 -1.7006000 -1.3015000 -1.2480000 -0.88400000 -0.76750000 -0.67600000 -0.452000000 -0.260000000 0.03200000 0.0970000 0.1325000 0.1640000 0.8020000 0.8475000 1.0538000 1.1500000
DIFF_RBC DEATH210 -0.367142857 0.6828627 0.14901285 0.6500000-0.40499884 0.30518589 -1.9700000 -1.8640000 -1.4400000 -1.1100000 -0.86000000 -0.77000000 -0.52000000 -0.410000000 -0.290000000-0.28000000-0.1900000-0.1200000 0.1300000 0.5900000 0.6300000 0.6940000 0.7100000
DIFF_SLR ALIVE380 0.012379763 0.1893314 0.03071361 0.2242893 0.39905069 0.35975745 -0.3187647 -0.3172185 -0.2956006 -0.2349369 -0.15802676 -0.11211673 -0.09112568 -0.029369579 0.015962444 0.05086431 0.0870160 0.1121725 0.1251329 0.2291428 0.3278227 0.4737786 0.5090979
DIFF_SLR DEATH210 -0.014920417 0.2696536 0.05884323 0.2990705 0.40533483 1.41268415 -0.5135982 -0.5113946 -0.5025806 -0.2566667 -0.19362704 -0.13682278 -0.13516976 -0.101647059 -0.045342594 0.03066528 0.1359975 0.1622477 0.2076389 0.2268707 0.2537146 0.6017427 0.6887497
DIFF_SPLEEN_UPTAKE ALIVE380 0.042105263 0.3802855 0.06169046 0.3600000 0.11739142 1.12480748 -0.8000000 -0.7482000 -0.6345000 -0.4380000 -0.18400000 -0.12000000 -0.07900000 -0.010000000 0.055000000 0.15800000 0.2180000 0.2400000 0.2840000 0.4180000 0.6175000 0.9561000 1.1300000
DIFF_SPLEEN_UPTAKE DEATH210 0.066190476 0.3390940 0.07399638 0.3500000 1.30365796 3.95311040 -0.4200000 -0.4180000 -0.4100000 -0.4000000 -0.14000000 -0.13000000 -0.07000000 -0.050000000 0.050000000 0.10000000 0.2000000 0.2200000 0.2700000 0.3400000 0.3400000 0.9720000 1.1300000
DIFF_WBC ALIVE380 -0.468421053 2.6410159 0.42842935 3.9525000 0.27158719-0.72429442 -5.7000000 -5.3596000 -3.9215000 -3.1980000 -2.67600000 -2.42750000 -2.14500000 -1.656000000 -0.855000000-0.16000000 1.1160000 1.5250000 1.8840000 3.5310000 3.9410000 4.4127000 4.5200000
DIFF_WBC DEATH210 0.355238095 2.8624982 0.62464833 3.3500000 0.85520225 1.17189325 -4.6400000 -4.2480000 -2.6800000 -2.6700000 -1.94000000 -1.29000000 -1.23000000 -0.360000000 0.100000000 0.38000000 1.1300000 2.0600000 2.3700000 3.2200000 5.4800000 7.2880000 7.7400000
OVERALL_TIME ALIVE380 17.763157895 16.2537883 2.6367126316.5000000 1.49609275 1.96731456 1.0000000 1.0000000 1.8500000 2.7000000 4.40000000 6.00000000 7.20000000 11.000000000 14.50000000015.2000000020.000000022.5000000 25.8000000 41.0000000 51.9500000 64.2600000 65.0000000
OVERALL_TIME DEATH210 21.523809524 15.1480000 3.3055646117.0000000 1.30493144 1.91970514 1.0000000 1.6000000 4.0000000 8.0000000 12.00000000 12.00000000 13.00000000 16.000000000 17.00000000021.0000000023.000000029.0000000 30.0000000 42.0000000 47.0000000 60.6000000 64.0000000
TIME_BETWEE_PET ALIVE380 10.578947368 8.9703162 1.4551774311.5000000 1.40520467 1.58539181 1.0000000 1.3700000 2.0000000 2.7000000 4.00000000 4.00000000 5.00000000 6.000000000 7.500000000 9.0000000011.800000015.5000000 18.2000000 22.2000000 26.2000000 36.1500000 38.0000000
TIME_BETWEE_PET DEATH210 7.904761905 3.9484777 0.86162847 7.0000000-0.18573481-0.95771678 1.0000000 1.2000000 2.0000000 3.0000000 4.00000000 4.00000000 5.00000000 8.000000000 9.000000000 9.0000000010.000000011.0000000 11.0000000 12.0000000 13.0000000 14.6000000 15.0000000

Diagrama de dispersion por pareja¶

In [178]:
# Diagrama de dispersion por pareja

options(repr.plot.width=8, repr.plot.height=5)
var_respuesta <- as.name(respuesta)

for (i in 1:length(var_num)) {
  name_i <- as.name(var_num[i])
  r <- list()
  
    r[[1]] <- ggplot(datos, aes(x=!!name_i, y=!!var_respuesta)) +
      geom_point() +
      geom_smooth(method=lm ,formula = y ~ x, color="red", fill="#69b3a2", se=TRUE)  +
    theme_minimal()
  
  grid.arrange(r[[1]],
               nrow=1, ncol = 1,
               top = textGrob(var_num[i],gp=gpar(fontsize=16,font=3)))
}

Histograma de densidad + boxplot + diagrama de error + violin plot de variables numericas vs respuesta¶

In [28]:
options(repr.plot.width=16, repr.plot.height=8)

# boxplot + diagrama de error + histograma de densidad + violin plot
for (i in 1:length(var_num)) {
  name_i <- as.name(var_num[i])
  name_y <- as.name(respuesta[1])
  
  r <- list()
  
  # Histograma de densidad
  r[[1]]<-ggplot(datos, aes(x=!!name_i, fill=!!name_y)) +
    geom_density(alpha=0.4)+
    theme_light() +
    #theme(legend.position = "none") +
    xlab("") +
    ylab("Densidad de Frecuencia")
  
  # Boxplot
  r[[2]]<-ggplot(data=datos, aes(y=!!name_i, x=!!name_y)) +
    geom_boxplot(size = 0.4) +
    theme_light() +
    theme(legend.position = "none")+
    xlab("")
  
  # Violin plot con diagramas de error
  r[[3]]<-ggerrorplot(data = datos, x = respuesta, y = var_num[i], 
                      desc_stat = "mean_ci",
                      error.plot = "errorbar",
                      add = c("violin","mean"))+
    theme_light() +
    theme(legend.position = "none") +
    xlab("") +
    ylab("") +
    #ggtitle("Comparacion pvut") +
    stat_compare_means(comparisons = c("0","1"))+ 
    stat_compare_means(label.y = 1.05*max(datos[,var_num[i]]))                 
  
  grid.arrange(r[[1]], r[[2]], r[[3]],
               nrow=3, ncol = 1,
               top = textGrob(var_num[i],gp=gpar(fontsize=16,font=3)))
}

Variables cualitativas vs salida¶

Graficos de barras de todas las variables cualitativas vs respuesta¶

In [185]:
name_y <- as.name(respuesta[1])

datos %>%
  group_by(!!name_y) %>%
  plot_bar_category()

Grafico de barras de los efectos secundarios por tratamiento¶

In [121]:
# EFECTOS SECUNDARIOS POR TRATAMIENTO(podemos hacer todas las combinaciones posibles)
var_cual_x = "CTCNCI"
var_cual_y = "TREATMENT"

name_x = as.name(var_cual_x)
name_y = as.name(var_cual_y)

ggbarstats(datos, x = !!name_x, y = !!name_y)

Grafico apilado de accion del medico por tratamiento¶

In [138]:
# ACCION DEL MEDICO POR TRATAMIENTO
var_cual_x = "TREATMENT"
var_cual_y = "ACTION_TAKEN_"

name_x = as.name(var_cual_x)
name_y = as.name(var_cual_y)


ggbarstats(datos, x = !!name_x, y = !!name_y)

Grafico comparativo de los efectos adversos con el tratamiento,junto a las acciones tomadas y ciclos entre Pet1 y Pet2¶

In [149]:
# Grafico comparativo de : efectos adversos con el tratamiento,junto a las acciones tomadas y el ciclo 
ggplot(datos, aes(x =ACTION_TAKEN_, y = CYCLES_BETWEEN_PET1_PET2))+
  geom_bar(
    aes(fill =CTCNCI ), stat = "identity", color = "white",
    position = position_dodge(0.9)
    )+
  facet_wrap(~TREATMENT) + 
  fill_palette("jco")

Grafico comparativo de los pacientes con efectos adversos y la accion del medico segun el tratamiento¶

In [150]:
# GRAFICO COMPARATIVO DE PACIENTES CON EFECTOS ADVERSOS SEGUN EL TRATAMIENTO Y ACCION DEL MEDICO
ggplot(datos) +
  geom_bar(aes(x=ACTION_TAKEN_, fill=CTCNCI),
           position = "dodge") +
  facet_wrap(~TREATMENT)

Histograma de densidad de DIFF_LIVER_UPTAKE vs tratamiento¶

In [124]:
# Histograma de densidad de DIFF_LIVER_UPTAKE por tratamiento:
var_x = 'DIFF_LIVER_UPTAKE'
name_x = as.name(var_x)
var_grupo = 'TREATMENT'
name_grupo = as.name(var_grupo)

options(warn=-1)
# With transparency (right)
p2 <- ggplot(data=datos, aes(x=!!name_x, group=!!name_grupo, fill=!!name_grupo)) +
    geom_density(adjust=1.5, alpha=.4) +
    theme_ipsum()
p2

ggplotly(p2)

options(warn=0)

Mosaic plot + Chi cuadrado de las variables cualitativas vs respuesta¶

In [186]:
# Mosaic plot y chi cuadrado de las variables cualitativas segun la variable respuesta:
r <- list()
for (i in 1:length(var_cual)) {
    # Variable de salida o respuesta
    name_y <- as.name(respuesta[1])

    # Variable de entrada
    name_i <- as.name(var_cual[i])


    # Tabla de contingencias de una variable con la target
    categ <- target_by(datos, !!name_y)
    cat_cat <- relate(categ, !!name_i)
    r[[i]]<-as.data.frame.matrix(cat_cat)
    
    # Chi cuadrado
    print(summary(cat_cat))
    
    # Mosaico de una variable con la target
    grid.arrange(plot(cat_cat),nrow=1, ncol = 1)
    
}
r
Call: xtabs(formula = formula_str, data = data, addNA = TRUE)
Number of cases in table: 59 
Number of factors: 2 
Test for independence of all factors:
	Chisq = 1.4811, df = 1, p-value = 0.2236
Call: xtabs(formula = formula_str, data = data, addNA = TRUE)
Number of cases in table: 59 
Number of factors: 2 
Test for independence of all factors:
	Chisq = 9.495, df = 3, p-value = 0.02338
	Chi-squared approximation may be incorrect
Call: xtabs(formula = formula_str, data = data, addNA = TRUE)
Number of cases in table: 59 
Number of factors: 2 
Test for independence of all factors:
	Chisq = 12.737, df = 9, p-value = 0.1749
	Chi-squared approximation may be incorrect
Call: xtabs(formula = formula_str, data = data, addNA = TRUE)
Number of cases in table: 59 
Number of factors: 2 
Test for independence of all factors:
	Chisq = 0.009083, df = 1, p-value = 0.9241
Call: xtabs(formula = formula_str, data = data, addNA = TRUE)
Number of cases in table: 59 
Number of factors: 2 
Test for independence of all factors:
	Chisq = 5.748, df = 4, p-value = 0.2188
	Chi-squared approximation may be incorrect
Call: xtabs(formula = formula_str, data = data, addNA = TRUE)
Number of cases in table: 59 
Number of factors: 2 
Test for independence of all factors:
	Chisq = 5.938, df = 5, p-value = 0.3123
	Chi-squared approximation may be incorrect
Call: xtabs(formula = formula_str, data = data, addNA = TRUE)
Number of cases in table: 59 
Number of factors: 2 
Test for independence of all factors:
	Chisq = 2.7546, df = 4, p-value = 0.5997
	Chi-squared approximation may be incorrect
Call: xtabs(formula = formula_str, data = data, addNA = TRUE)
Number of cases in table: 59 
Number of factors: 2 
Test for independence of all factors:
	Chisq = 13.937, df = 6, p-value = 0.03035
	Chi-squared approximation may be incorrect
  1. A data.frame: 2 × 2
    FEMALEMALE
    <int><int>
    ALIVE1523
    DEATH 516
  2. A data.frame: 2 × 4
    IIIIIIIV
    <int><int><int><int>
    ALIVE181910
    DEATH57 7 2
  3. A data.frame: 2 × 10
    EWING SARCOMAGASTRIC CANCERGINECOLOGICALHEAD AND NECKLUNG CANCERMELANOMAPANCREAS CANCERRENAL CANCERSARCOMAUROTHELIAL CARCINOMA
    <int><int><int><int><int><int><int><int><int><int>
    ALIVE20661424301
    DEATH0314 533110
  4. A data.frame: 2 × 2
    CHEMOICI
    <int><int>
    ALIVE2414
    DEATH13 8
  5. A data.frame: 2 × 5
    ASYMTOMATICBEDBOUNDSYMPTOMATIC >50 % IN THE BEDSYMPTOMATIC BUT AMBULATORYSYMPTOMATIC,<50% IN BED DURING THE DAY
    <int><int><int><int><int>
    ALIVE1901810
    DEATH 7109 4
  6. A data.frame: 2 × 6
    ATEROESCLEROSISCANCER RRECURRENCECHRONIC INFLAMMATIONDIABETES MELLITUSHYPERLIPIDEMIAHYPERTENSION
    <int><int><int><int><int><int>
    ALIVE2359712
    DEATH15445 2
  7. A data.frame: 2 × 5
    HIGHT SEVEREMODERATENO SIDE EFFECTSSEVERESLIGTHLY SIDE EFFCTS
    <int><int><int><int><int>
    ALIVE262172
    DEATH25 752
  8. A data.frame: 2 × 7
    ADDED OTHER TREATMENADDED STEROIDSDOSE NOT CHANGEDDOSE REDUCEDDRUG INTERRUPTEDDRUG WIHDRAWNUNKNOW
    <int><int><int><int><int><int><int>
    ALIVE30179306
    DEATH01 48611

DEFINIMOS LOS DATAFRAMES DE ENTRADA Y SALIDA¶

In [187]:
# Voy a definir el data frame de entrada: X
X = datos[-ncol(datos)]
head(X)

# Voy a definir la variable de salida:
y = as.factor(as.vector(unlist(datos[,respuesta])))
head(y)
A tibble: 6 × 33
STATUSCYCLES_BETWEEN_PET1_PET2GENDERAGETNM_STAGEDIFF_WBCDIFF_RBCDIFF_HBDIFF_PLTDIFF_CRPDIFF_ALBUMINDIFF_LDHDIFF_eGFRDIFF_ASTDIFF_ALTDIFF_KDIFF_BGLBMIDIFF_BWDIFF_SPLEEN_UPTAKEDIFF_BM_UPTAKEDIFF_LIVER_UPTAKEDIFF_ESTIMATED_SPLEEN_VOLDIAGNOSTICTREATMENTECOGPSCOMORBIDITIESCTCNCIACTION_TAKEN_TIME_BETWEE_PETdiasDIFF_SLRDIFF_BMLR
<fct><dbl><fct><dbl><fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><fct><fct><fct><fct><fct><fct><dbl><dbl><dbl><dbl>
ALIVE3MALE 54III-1.83-0.64-2.20 -82-0.56 0.98-271.0 23.7 0.313.2 0.10 1222.90 -4.0 0.02 0.12 0.44-80.9GINECOLOGICALICISYMPTOMATIC BUT AMBULATORY HYPERLIPIDEMIA SEVERE DRUG INTERRUPTED2 25 0.029335236 0.10905937
ALIVE2FEMALE79IV -3.08 0.14 0.70-159-3.80 1.33 22.0 -0.2 4.4 5.3 0.58-1416.47 0.3-0.20-0.60-0.30 6.2HEAD AND NECKICIASYMTOMATIC HYPERTENSION NO SIDE EFFECTSDRUG INTERRUPTED4 0-0.156566790-0.04688961
ALIVE3MALE 60III-3.18-0.50 3.54 22-0.06-0.20 64.2 4.6-2.6 1.1 0.33 1220.15 -8.1-0.12 0.34 0.27 9.1LUNG CANCER ICISYMPTOMATIC,<50% IN BED DURING THE DAYDIABETES MELLITUS MODERATE DOSE REDUCED 4 14-0.044698028-0.36568856
ALIVE3MALE 76II -1.28 0.79 1.70 9 0.04-0.12 -55.0 -8.9-0.7-6.7 0.05 -422.83 0.0 0.46-0.40 0.59-38.8RENAL CANCER ICIASYMTOMATIC HYPERTENSION NO SIDE EFFECTSDOSE NOT CHANGED6 0-0.002886671 0.13061297
ALIVE5FEMALE70II 0.00-0.09-0.40 -95-7.23 0.05 6.0-13.2 2.7 2.2 0.06 -818.29 -0.1-0.34-0.80-0.48-87.6MELANOMA ICISYMPTOMATIC BUT AMBULATORY DIABETES MELLITUS NO SIDE EFFECTSDOSE NOT CHANGED6 0-0.095215760-0.25268025
ALIVE4MALE 54IV 0.99 0.10 0.00 -3 0.02-0.16 -4.0-13.1 0.6 2.8-0.47 -327.18-10.8 0.00-0.10-0.53 34.4LUNG CANCER ICIASYMTOMATIC CANCER RRECURRENCEHIGHT SEVERE DOSE NOT CHANGED6363-0.167591661 0.07251379
  1. ALIVE
  2. ALIVE
  3. ALIVE
  4. ALIVE
  5. ALIVE
  6. ALIVE
Levels:
  1. 'ALIVE'
  2. 'DEATH'

FEATURE SELECTION (SELECCION DE CARACTERISTICAS)¶

  1. Feature selection univariada ( ANOVA/Kruskall Wallis = Cuantitativa vs salida o Chi Cuadrado = Cualitativas vs salida, MI = Mutual information)
  • Quitar las variables con MI = 0
  • Quitar las variables con p-valores >25% = 0.25
  1. Feature selection multivariado:
  • RFE - recursive feature elimination RANDOM FOREST( para pocos datos como este dataset lo probe y no funciona bien)
  • Modelo logístico (caso binario) mejor modelo según el BIC. --> con pocos datos y pocas variables(para este caso seleccionando las variables importantes)

Feature selection univariado: cuantitativas vs la salida¶

Wilcoxon rank test¶

In [29]:
#options(warn=-1)
## Multiple Wilcoxon rank sum tests - casos binarios
datos$STATUS <- as.numeric(datos$STATUS)
Wilk_p <- as.data.frame(t(as.data.frame(lapply(datos[,var_num], function(x) wilcox.test(x ~ datos$STATUS)$p.value)))) %>%
  arrange(V1)
         
Wilk_p  
#options(warn=-0)
A data.frame: 25 × 1
V1
<dbl>
DIFF_BW0.001071841
DIFF_LIVER_UPTAKE0.052455399
dias0.108429874
DIFF_PLT0.115177643
DIFF_CRP0.151877257
OVERALL_TIME0.168110480
BMI0.205329235
DIFF_HB0.225794623
DIFF_K0.244513380
CYCLES_BETWEEN_PET1_PET20.250401922
DIFF_WBC0.271201503
AGE0.366288338
DIFF_BMLR0.466463400
DIFF_AST0.536942262
DIFF_BGL0.557948763
DIFF_SLR0.669052785
DIFF_RBC0.680615800
TIME_BETWEE_PET0.744938990
DIFF_ALBUMIN0.769606349
DIFF_ESTIMATED_SPLEEN_VOL0.771492426
DIFF_LDH0.806140116
DIFF_eGFR0.924323941
DIFF_SPLEEN_UPTAKE0.974737237
DIFF_BM_UPTAKE0.987368328
DIFF_ALT1.000000000

Kruskall Wallis test¶

In [30]:
## Kruskall wallis rank sum tests - para todos los casos
datos$STATUS <- as.numeric(datos$STATUS)
Kruskal_p <- as.data.frame(t(as.data.frame(lapply(datos[,var_num], function(x) kruskal.test(x ~ datos$STATUS)$p.value)))) %>%
  arrange(V1)
                                               
Kruskal_p
A data.frame: 25 × 1
V1
<dbl>
DIFF_BW0.001042032
DIFF_LIVER_UPTAKE0.051499589
dias0.106574249
DIFF_PLT0.113362723
DIFF_CRP0.149567892
OVERALL_TIME0.165679149
BMI0.202511282
DIFF_HB0.222742163
DIFF_K0.241312102
CYCLES_BETWEEN_PET1_PET20.247127164
DIFF_WBC0.267761696
AGE0.362099048
DIFF_BMLR0.461632677
DIFF_AST0.531723207
DIFF_BGL0.552640237
DIFF_SLR0.663298181
DIFF_RBC0.674817935
TIME_BETWEE_PET0.738942177
DIFF_ALBUMIN0.763549184
DIFF_ESTIMATED_SPLEEN_VOL0.763569297
DIFF_LDH0.800017121
DIFF_eGFR0.918037782
DIFF_SPLEEN_UPTAKE0.968420824
DIFF_BM_UPTAKE0.981053204
DIFF_ALT0.993683966

Anova¶

Ahora hacemos la ANOVA que es el equivalente paramétrico a Kurskall Wallis

In [190]:
## ANOVA - para todos los casos
datos$STATUS <- as.factor(datos$STATUS)
ANOVA_p<-as.data.frame(t(as.data.frame(lapply(datos[,var_num], function(x) anova(lm(x ~ datos$STATUS))$`Pr(>F)` [1])))) %>%
  arrange(V1)
                                              
ANOVA_p
A data.frame: 25 × 1
V1
<dbl>
DIFF_BW0.0008674499
DIFF_LIVER_UPTAKE0.1128440197
BMI0.1313002214
DIFF_HB0.1987145533
TIME_BETWEE_PET0.2006455075
DIFF_ALT0.2283988495
DIFF_WBC0.2702341457
DIFF_PLT0.2858364966
DIFF_K0.3178280254
AGE0.3199523200
OVERALL_TIME0.3872791867
DIFF_BMLR0.4251613367
dias0.4311242737
DIFF_AST0.4411739211
DIFF_CRP0.5579829726
DIFF_BGL0.5739834861
DIFF_LDH0.5836796913
DIFF_eGFR0.5854167472
DIFF_RBC0.6510598009
DIFF_SLR0.6511375206
CYCLES_BETWEEN_PET1_PET20.6804891959
DIFF_ESTIMATED_SPLEEN_VOL0.7346993354
DIFF_SPLEEN_UPTAKE0.8098195294
DIFF_BM_UPTAKE0.9039677773
DIFF_ALBUMIN0.9999543656

Feature selection univariado: cualitativa vs la salida¶

In [39]:
datos$STATUS<-as.numeric(datos$STATUS)
datos$GENDER <- as.numeric(datos$GENDER)
datos$COMORBIDITIES <- as.numeric(datos$COMORBIDITIES)
datos$CTCNCI <- as.numeric(datos$CTCNCI)
datos$ACTION_TAKEN_ <- as.numeric(datos$ACTION_TAKEN_)
datos$TNM_STAGE <- as.numeric(datos$TNM_STAGE)
datos$DIAGNOSTIC <- as.numeric(datos$DIAGNOSTIC)
datos$TREATMENT <- as.numeric(datos$TREATMENT)
datos$ECOGPS <- as.numeric(datos$ECOGPS)

Chi cuadrado de Pearson¶

In [43]:
res_sig <- rep(0,length(var_cual))

for (i in 1:length(var_cual)){
  chisq <- chisq.test(table(as.factor(as.character(datos[,var_cual[i]])),datos[,'STATUS']),correct = TRUE,simulate.p.value = 10000)
  res_sig[i]<-chisq$p.value
}
df_sig <- data.frame(p_valor=res_sig)
row.names(df_sig) <- colnames(datos[,var_cual])

idx_sig <- sort(df_sig$p_valor,decreasing = FALSE,index.return=TRUE)$ix
df_sig_sort <- data.frame(p_valor=res_sig[idx_sig])
row.names(df_sig_sort) <- colnames(datos[,var_cual])[idx_sig]
df_sig_sort
A data.frame: 8 × 1
p_valor
<dbl>
ACTION_TAKEN_0.01399300
TNM_STAGE0.02348826
DIAGNOSTIC0.13593203
ECOGPS0.18440780
GENDER0.25187406
COMORBIDITIES0.31534233
CTCNCI0.67266367
TREATMENT1.00000000

Filtro univariado Mutual information (MI) - TODOS los casos¶

In [44]:
# Mutual informacion variables cuantitativas:
inf_gain <- information.gain(STATUS~., data = datos)
inf_gain <- inf_gain %>%
  arrange(desc(attr_importance))
inf_gain
A data.frame: 33 × 1
attr_importance
<dbl>
TREATMENT0.4393713
DIFF_BW0.2900315
CYCLES_BETWEEN_PET1_PET20.0000000
GENDER0.0000000
AGE0.0000000
TNM_STAGE0.0000000
DIFF_WBC0.0000000
DIFF_RBC0.0000000
DIFF_HB0.0000000
DIFF_PLT0.0000000
DIFF_CRP0.0000000
DIFF_ALBUMIN0.0000000
DIFF_LDH0.0000000
DIFF_eGFR0.0000000
DIFF_AST0.0000000
DIFF_ALT0.0000000
DIFF_K0.0000000
DIFF_BGL0.0000000
BMI0.0000000
DIFF_SPLEEN_UPTAKE0.0000000
DIFF_BM_UPTAKE0.0000000
DIFF_LIVER_UPTAKE0.0000000
DIFF_ESTIMATED_SPLEEN_VOL0.0000000
DIAGNOSTIC0.0000000
ECOGPS0.0000000
COMORBIDITIES0.0000000
CTCNCI0.0000000
ACTION_TAKEN_0.0000000
TIME_BETWEE_PET0.0000000
dias0.0000000
DIFF_SLR0.0000000
DIFF_BMLR0.0000000
OVERALL_TIME0.0000000

Feature selection multivariado - BIC modelo estadístico - BINARIO¶

In [45]:
row.names(df_sig_sort)[1:3]
  1. 'ACTION_TAKEN_'
  2. 'TNM_STAGE'
  3. 'DIAGNOSTIC'
In [46]:
row.names(Kruskal_p)[1:5]
  1. 'DIFF_BW'
  2. 'DIFF_LIVER_UPTAKE'
  3. 'dias'
  4. 'DIFF_PLT'
  5. 'DIFF_CRP'
In [48]:
datos$STATUS<- as.factor(datos$STATUS)
In [49]:
modelos_5top <-
  glmulti(STATUS~ACTION_TAKEN_+TNM_STAGE+DIAGNOSTIC+DIFF_BW+DIFF_LIVER_UPTAKE+dias+DIFF_PLT+DIFF_CRP, data = datos,
          level = 1,               # No interaction considered
          method = "h",            # Exhaustive approach
          crit = "bic",            # BIC as criteria
          confsetsize = 5,         # Keep 5 best models
          plotty = F, report = F,  # No plot or interim reports
          fitfunction = "glm",     # glm function
          family = binomial)       # binomial family for logistic regression

# Los 5 mejores modelos
print(modelos_5top@formulas)
options(warn = -1)
[[1]]
STATUS ~ 1 + TNM_STAGE + DIFF_BW
<environment: 0x000002432f554a78>

[[2]]
STATUS ~ 1 + DIFF_BW
<environment: 0x000002432f554a78>

[[3]]
STATUS ~ 1 + TNM_STAGE + DIFF_BW + DIFF_LIVER_UPTAKE
<environment: 0x000002432f554a78>

[[4]]
STATUS ~ 1 + TNM_STAGE + DIFF_BW + dias
<environment: 0x000002432f554a78>

[[5]]
STATUS ~ 1 + TNM_STAGE + DIFF_BW + DIFF_PLT
<environment: 0x000002432f554a78>

In [56]:
# El summary de los 5 mejores modelos
summary(modelos_5top@objects[[3]])
Call:
fitfunc(formula = as.formula(x), family = ..1, data = data)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.9267  -0.6980  -0.4480   0.8274   1.9488  

Coefficients:
                  Estimate Std. Error z value Pr(>|z|)  
(Intercept)        1.23509    1.02726   1.202   0.2292  
TNM_STAGE         -0.83662    0.37196  -2.249   0.0245 *
DIFF_BW            0.11358    0.04968   2.286   0.0222 *
DIFF_LIVER_UPTAKE -0.86902    0.62713  -1.386   0.1658  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 76.823  on 58  degrees of freedom
Residual deviance: 57.697  on 55  degrees of freedom
AIC: 65.697

Number of Fisher Scoring iterations: 5
In [57]:
# Las variables que aparecen en el mejor modelo son:
var_sel_mod1<-names(modelos_5top@objects[[3]]$coefficients)[-1]
print(var_sel_mod1)
[1] "TNM_STAGE"         "DIFF_BW"           "DIFF_LIVER_UPTAKE"
In [58]:
var_sel_mod1 <- c("TNM_STAGE","DIFF_BW","DIFF_LIVER_UPTAKE")
var_sel_mod1
  1. 'TNM_STAGE'
  2. 'DIFF_BW'
  3. 'DIFF_LIVER_UPTAKE'
In [59]:
# Calculamos el mejor modelo y su accuracy:

# Construimos la formula de forma automatica:
x = paste(var_sel_mod1, collapse = " + ")
y = respuesta
formBIC = as.formula(paste(y, "~", x))
formBIC
STATUS ~ TNM_STAGE + DIFF_BW + DIFF_LIVER_UPTAKE
In [60]:
# Hacemos la matriz de confusion y los estadisticos del modelo
modelo1 <- train(formBIC, data=datos,method='glm')

    print("Matriz de confusión")
    pred <- predict(newdata=datos,modelo1) # prediccion usando el modelo logistico
    real <- as.factor(datos[,respuesta]) # respuesta real
    cm_train_TOT <- caret::confusionMatrix(data=pred,reference=real) # guardamos la matriz de confusion de training
    print(cm_train_TOT) # pintamos la matriz de confusion de training
[1] "Matriz de confusión"
Confusion Matrix and Statistics

          Reference
Prediction  1  2
         1 33 12
         2  5  9
                                          
               Accuracy : 0.7119          
                 95% CI : (0.5792, 0.8224)
    No Information Rate : 0.6441          
    P-Value [Acc > NIR] : 0.1709          
                                          
                  Kappa : 0.3209          
                                          
 Mcnemar's Test P-Value : 0.1456          
                                          
            Sensitivity : 0.8684          
            Specificity : 0.4286          
         Pos Pred Value : 0.7333          
         Neg Pred Value : 0.6429          
             Prevalence : 0.6441          
         Detection Rate : 0.5593          
   Detection Prevalence : 0.7627          
      Balanced Accuracy : 0.6485          
                                          
       'Positive' Class : 1               
                                          

Feature selection Boruta¶

Una de las técnicas más completas que podemos aplicar es la metodología de Boruta.El problema es que el dataset tiene pocas observaciones y para que funcione perfectamente es mejor usar mas,pero lo aplicamos como comparacion.

In [201]:
# Voy a definir el data frame de entrada: X
X = datos[-1]
head(X)

# Voy a definir la variable de salida:
y = as.factor(as.vector(unlist(datos[,respuesta])))
head(y)
A tibble: 6 × 33
CYCLES_BETWEEN_PET1_PET2GENDERAGETNM_STAGEDIFF_WBCDIFF_RBCDIFF_HBDIFF_PLTDIFF_CRPDIFF_ALBUMINDIFF_LDHDIFF_eGFRDIFF_ASTDIFF_ALTDIFF_KDIFF_BGLBMIDIFF_BWDIFF_SPLEEN_UPTAKEDIFF_BM_UPTAKEDIFF_LIVER_UPTAKEDIFF_ESTIMATED_SPLEEN_VOLDIAGNOSTICTREATMENTECOGPSCOMORBIDITIESCTCNCIACTION_TAKEN_TIME_BETWEE_PETdiasDIFF_SLRDIFF_BMLROVERALL_TIME
<dbl><fct><dbl><fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><fct><fct><fct><fct><fct><fct><dbl><dbl><dbl><dbl><dbl>
3MALE 54III-1.83-0.64-2.20 -82-0.56 0.98-271.0 23.7 0.313.2 0.10 1222.90 -4.0 0.02 0.12 0.44-80.9GINECOLOGICALICISYMPTOMATIC BUT AMBULATORY HYPERLIPIDEMIA SEVERE DRUG INTERRUPTED2 25 0.029335236 0.10905937 2
2FEMALE79IV -3.08 0.14 0.70-159-3.80 1.33 22.0 -0.2 4.4 5.3 0.58-1416.47 0.3-0.20-0.60-0.30 6.2HEAD AND NECKICIASYMTOMATIC HYPERTENSION NO SIDE EFFECTSDRUG INTERRUPTED4 0-0.156566790-0.0468896150
3MALE 60III-3.18-0.50 3.54 22-0.06-0.20 64.2 4.6-2.6 1.1 0.33 1220.15 -8.1-0.12 0.34 0.27 9.1LUNG CANCER ICISYMPTOMATIC,<50% IN BED DURING THE DAYDIABETES MELLITUS MODERATE DOSE REDUCED 4 14-0.044698028-0.36568856 2
3MALE 76II -1.28 0.79 1.70 9 0.04-0.12 -55.0 -8.9-0.7-6.7 0.05 -422.83 0.0 0.46-0.40 0.59-38.8RENAL CANCER ICIASYMTOMATIC HYPERTENSION NO SIDE EFFECTSDOSE NOT CHANGED6 0-0.002886671 0.1306129712
5FEMALE70II 0.00-0.09-0.40 -95-7.23 0.05 6.0-13.2 2.7 2.2 0.06 -818.29 -0.1-0.34-0.80-0.48-87.6MELANOMA ICISYMPTOMATIC BUT AMBULATORY DIABETES MELLITUS NO SIDE EFFECTSDOSE NOT CHANGED6 0-0.095215760-0.2526802512
4MALE 54IV 0.99 0.10 0.00 -3 0.02-0.16 -4.0-13.1 0.6 2.8-0.47 -327.18-10.8 0.00-0.10-0.53 34.4LUNG CANCER ICIASYMTOMATIC CANCER RRECURRENCEHIGHT SEVERE DOSE NOT CHANGED6363-0.167591661 0.0725137918
  1. 1
  2. 1
  3. 1
  4. 1
  5. 1
  6. 1
Levels:
  1. '1'
  2. '2'
In [202]:
# Determinar la importancia de los atributos.
boruta.model <- Boruta(y~., data = cbind(X,y), doTrace = 2)
 1. run of importance source...

 2. run of importance source...

 3. run of importance source...

 4. run of importance source...

 5. run of importance source...

 6. run of importance source...

 7. run of importance source...

 8. run of importance source...

 9. run of importance source...

 10. run of importance source...

 11. run of importance source...

 12. run of importance source...

After 12 iterations, +0.53 secs: 

 rejected 30 attributes: ACTION_TAKEN_, AGE, BMI, COMORBIDITIES, CTCNCI and 25 more;

 still have 3 attributes left.


 13. run of importance source...

 14. run of importance source...

 15. run of importance source...

 16. run of importance source...

 17. run of importance source...

 18. run of importance source...

 19. run of importance source...

 20. run of importance source...

After 20 iterations, +0.79 secs: 

 confirmed 1 attribute: DIFF_BW;

 still have 2 attributes left.


 21. run of importance source...

 22. run of importance source...

 23. run of importance source...

 24. run of importance source...

 25. run of importance source...

 26. run of importance source...

 27. run of importance source...

 28. run of importance source...

 29. run of importance source...

 30. run of importance source...

 31. run of importance source...

 32. run of importance source...

 33. run of importance source...

 34. run of importance source...

 35. run of importance source...

 36. run of importance source...

 37. run of importance source...

 38. run of importance source...

 39. run of importance source...

 40. run of importance source...

 41. run of importance source...

 42. run of importance source...

 43. run of importance source...

 44. run of importance source...

 45. run of importance source...

 46. run of importance source...

 47. run of importance source...

 48. run of importance source...

 49. run of importance source...

 50. run of importance source...

 51. run of importance source...

 52. run of importance source...

 53. run of importance source...

 54. run of importance source...

 55. run of importance source...

 56. run of importance source...

 57. run of importance source...

 58. run of importance source...

 59. run of importance source...

 60. run of importance source...

 61. run of importance source...

 62. run of importance source...

 63. run of importance source...

 64. run of importance source...

 65. run of importance source...

 66. run of importance source...

 67. run of importance source...

 68. run of importance source...

 69. run of importance source...

 70. run of importance source...

 71. run of importance source...

 72. run of importance source...

 73. run of importance source...

 74. run of importance source...

 75. run of importance source...

 76. run of importance source...

 77. run of importance source...

 78. run of importance source...

 79. run of importance source...

 80. run of importance source...

 81. run of importance source...

 82. run of importance source...

 83. run of importance source...

 84. run of importance source...

 85. run of importance source...

 86. run of importance source...

 87. run of importance source...

 88. run of importance source...

 89. run of importance source...

 90. run of importance source...

After 90 iterations, +2.8 secs: 

 confirmed 1 attribute: TNM_STAGE;

 still have 1 attribute left.


 91. run of importance source...

 92. run of importance source...

 93. run of importance source...

 94. run of importance source...

 95. run of importance source...

 96. run of importance source...

 97. run of importance source...

 98. run of importance source...

 99. run of importance source...

In [203]:
options(repr.plot.width=12, repr.plot.height=8)

print(boruta.model)
plot(boruta.model)
Boruta performed 99 iterations in 3.069594 secs.
 2 attributes confirmed important: DIFF_BW, TNM_STAGE;
 30 attributes confirmed unimportant: ACTION_TAKEN_, AGE, BMI,
COMORBIDITIES, CTCNCI and 25 more;
 1 tentative attributes left: DIFF_CRP;
In [204]:
# Refinar modelo para resolver posibles atributos tentativos.
# Dado que pueden existir atributos no resueltos (tentativos), refinamos el modelo.

boruta.model2 <- TentativeRoughFix(boruta.model)
print(boruta.model2)
plot(boruta.model2)
Boruta performed 99 iterations in 3.069594 secs.
Tentatives roughfixed over the last 99 iterations.
 2 attributes confirmed important: DIFF_BW, TNM_STAGE;
 31 attributes confirmed unimportant: ACTION_TAKEN_, AGE, BMI,
COMORBIDITIES, CTCNCI and 26 more;
In [205]:
# Obtener una lista de los atributos y sus etiquetas con el analisis de Boruta
as.data.frame(boruta.model$finalDecision)
A data.frame: 33 × 1
boruta.model$finalDecision
<fct>
CYCLES_BETWEEN_PET1_PET2Rejected
GENDERRejected
AGERejected
TNM_STAGEConfirmed
DIFF_WBCRejected
DIFF_RBCRejected
DIFF_HBRejected
DIFF_PLTRejected
DIFF_CRPTentative
DIFF_ALBUMINRejected
DIFF_LDHRejected
DIFF_eGFRRejected
DIFF_ASTRejected
DIFF_ALTRejected
DIFF_KRejected
DIFF_BGLRejected
BMIRejected
DIFF_BWConfirmed
DIFF_SPLEEN_UPTAKERejected
DIFF_BM_UPTAKERejected
DIFF_LIVER_UPTAKERejected
DIFF_ESTIMATED_SPLEEN_VOLRejected
DIAGNOSTICRejected
TREATMENTRejected
ECOGPSRejected
COMORBIDITIESRejected
CTCNCIRejected
ACTION_TAKEN_Rejected
TIME_BETWEE_PETRejected
diasRejected
DIFF_SLRRejected
DIFF_BMLRRejected
OVERALL_TIMERejected
In [206]:
# Obtener una lista de los atributos importantes y tentativos
opc_boruta = getSelectedAttributes(boruta.model2, withTentative = F)
opc_boruta
  1. 'TNM_STAGE'
  2. 'DIFF_BW'
In [207]:
# Construimos la formula de forma automatica:
x = paste(opc_boruta, collapse = " + ")
y = respuesta
formBoruta = as.formula(paste(y, "~", x))
formBoruta
STATUS ~ TNM_STAGE + DIFF_BW

Curva Roc¶

In [61]:
library(ROCR)
In [114]:
index = sample(1:nrow(datos), size = .100 * nrow(datos))
#index
train = datos[index, ]
test = datos[-index, ]

model = glm(STATUS~ACTION_TAKEN_+TNM_STAGE+DIAGNOSTIC+DIFF_BW+DIFF_LIVER_UPTAKE+dias+DIFF_PLT+DIFF_CRP,data=train, 
            family = binomial(link = "logit"))

pred = predict(model,test,type="response")
pred = prediction(pred,test$STATUS)
perf = performance(pred, "acc")
#plot(perf)

max_ind = which.max(slot(perf, "y.values")[[1]] )
acc = slot(perf, "y.values")[[1]][max_ind]
cutoff = slot(perf, "x.values")[[1]][max_ind]
print(c(accuracy= acc))

perf_cost = performance(pred, "cost")
perf_err = performance(pred, "err")
perf_tpr = performance(pred, "tpr")
perf_sn_sp = performance(pred, "sens", "spec")

roc = performance(pred,"tpr","fpr")
plot(roc, colorize = T, lwd = 2)
abline(a = 0, b = 1)

auc = performance(pred, measure = "auc")
print(auc@y.values)
 accuracy 
0.7222222 
[[1]]
[1] 0.7022556

CONCLUSIONES¶

El principal problema que nos hemos encontrado es que es un dataset con pocas observaciones.Aunque la estadistica inferencial procura sacar conclusiones generales a traves de una muestra , esta debe ser lo suficientemente significativa para que los distintos test y modelos puedan hacer su trabajo con un minimo de confiabilidad y minuciosidad.
A pesar de ello hemos sacado algunas conclusiones que quiza no sean importantes o relevantes en la medida de lo que queriamos. Pero por otro lado observando con minuciosidad los distintos graficos comparativos si podemos extraer conclusiones mas valiosas.